LLVM  14.0.0
PPCISelLowering.cpp
Go to the documentation of this file.
1 //===-- PPCISelLowering.cpp - PPC DAG Lowering Implementation -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the PPCISelLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCISelLowering.h"
15 #include "PPC.h"
16 #include "PPCCCState.h"
17 #include "PPCCallingConv.h"
18 #include "PPCFrameLowering.h"
19 #include "PPCInstrInfo.h"
20 #include "PPCMachineFunctionInfo.h"
21 #include "PPCPerfectShuffle.h"
22 #include "PPCRegisterInfo.h"
23 #include "PPCSubtarget.h"
24 #include "PPCTargetMachine.h"
25 #include "llvm/ADT/APFloat.h"
26 #include "llvm/ADT/APInt.h"
27 #include "llvm/ADT/ArrayRef.h"
28 #include "llvm/ADT/DenseMap.h"
29 #include "llvm/ADT/None.h"
30 #include "llvm/ADT/STLExtras.h"
31 #include "llvm/ADT/SmallPtrSet.h"
32 #include "llvm/ADT/SmallSet.h"
33 #include "llvm/ADT/SmallVector.h"
34 #include "llvm/ADT/Statistic.h"
35 #include "llvm/ADT/StringRef.h"
36 #include "llvm/ADT/StringSwitch.h"
58 #include "llvm/IR/CallingConv.h"
59 #include "llvm/IR/Constant.h"
60 #include "llvm/IR/Constants.h"
61 #include "llvm/IR/DataLayout.h"
62 #include "llvm/IR/DebugLoc.h"
63 #include "llvm/IR/DerivedTypes.h"
64 #include "llvm/IR/Function.h"
65 #include "llvm/IR/GlobalValue.h"
66 #include "llvm/IR/IRBuilder.h"
67 #include "llvm/IR/Instructions.h"
68 #include "llvm/IR/Intrinsics.h"
69 #include "llvm/IR/IntrinsicsPowerPC.h"
70 #include "llvm/IR/Module.h"
71 #include "llvm/IR/Type.h"
72 #include "llvm/IR/Use.h"
73 #include "llvm/IR/Value.h"
74 #include "llvm/MC/MCContext.h"
75 #include "llvm/MC/MCExpr.h"
76 #include "llvm/MC/MCRegisterInfo.h"
77 #include "llvm/MC/MCSectionXCOFF.h"
78 #include "llvm/MC/MCSymbolXCOFF.h"
81 #include "llvm/Support/Casting.h"
82 #include "llvm/Support/CodeGen.h"
84 #include "llvm/Support/Compiler.h"
85 #include "llvm/Support/Debug.h"
87 #include "llvm/Support/Format.h"
88 #include "llvm/Support/KnownBits.h"
94 #include <algorithm>
95 #include <cassert>
96 #include <cstdint>
97 #include <iterator>
98 #include <list>
99 #include <utility>
100 #include <vector>
101 
102 using namespace llvm;
103 
104 #define DEBUG_TYPE "ppc-lowering"
105 
106 static cl::opt<bool> DisablePPCPreinc("disable-ppc-preinc",
107 cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden);
108 
109 static cl::opt<bool> DisableILPPref("disable-ppc-ilp-pref",
110 cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden);
111 
112 static cl::opt<bool> DisablePPCUnaligned("disable-ppc-unaligned",
113 cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden);
114 
115 static cl::opt<bool> DisableSCO("disable-ppc-sco",
116 cl::desc("disable sibling call optimization on ppc"), cl::Hidden);
117 
118 static cl::opt<bool> DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32",
119 cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden);
120 
121 static cl::opt<bool> UseAbsoluteJumpTables("ppc-use-absolute-jumptables",
122 cl::desc("use absolute jump tables on ppc"), cl::Hidden);
123 
125  "ppc-quadword-atomics",
126  cl::desc("enable quadword lock-free atomic operations"), cl::init(false),
127  cl::Hidden);
128 
129 STATISTIC(NumTailCalls, "Number of tail calls");
130 STATISTIC(NumSiblingCalls, "Number of sibling calls");
131 STATISTIC(ShufflesHandledWithVPERM, "Number of shuffles lowered to a VPERM");
132 STATISTIC(NumDynamicAllocaProbed, "Number of dynamic stack allocation probed");
133 
134 static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int);
135 
136 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl);
137 
138 static const char AIXSSPCanaryWordName[] = "__ssp_canary_word";
139 
140 // FIXME: Remove this once the bug has been fixed!
142 
144  const PPCSubtarget &STI)
145  : TargetLowering(TM), Subtarget(STI) {
146  // Initialize map that relates the PPC addressing modes to the computed flags
147  // of a load/store instruction. The map is used to determine the optimal
148  // addressing mode when selecting load and stores.
149  initializeAddrModeMap();
150  // On PPC32/64, arguments smaller than 4/8 bytes are extended, so all
151  // arguments are at least 4/8 bytes aligned.
152  bool isPPC64 = Subtarget.isPPC64();
153  setMinStackArgumentAlignment(isPPC64 ? Align(8) : Align(4));
154 
155  // Set up the register classes.
156  addRegisterClass(MVT::i32, &PPC::GPRCRegClass);
157  if (!useSoftFloat()) {
158  if (hasSPE()) {
159  addRegisterClass(MVT::f32, &PPC::GPRCRegClass);
160  // EFPU2 APU only supports f32
161  if (!Subtarget.hasEFPU2())
162  addRegisterClass(MVT::f64, &PPC::SPERCRegClass);
163  } else {
164  addRegisterClass(MVT::f32, &PPC::F4RCRegClass);
165  addRegisterClass(MVT::f64, &PPC::F8RCRegClass);
166  }
167  }
168 
169  // Match BITREVERSE to customized fast code sequence in the td file.
172 
173  // Sub-word ATOMIC_CMP_SWAP need to ensure that the input is zero-extended.
175 
176  // Custom lower inline assembly to check for special registers.
179 
180  // PowerPC has an i16 but no i8 (or i1) SEXTLOAD.
181  for (MVT VT : MVT::integer_valuetypes()) {
184  }
185 
186  if (Subtarget.isISA3_0()) {
191  } else {
192  // No extending loads from f16 or HW conversions back and forth.
201  }
202 
204 
205  // PowerPC has pre-inc load and store's.
216  if (!Subtarget.hasSPE()) {
221  }
222 
223  // PowerPC uses ADDC/ADDE/SUBC/SUBE to propagate carry.
224  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
225  for (MVT VT : ScalarIntVTs) {
230  }
231 
232  if (Subtarget.useCRBits()) {
234 
235  if (isPPC64 || Subtarget.hasFPCVT()) {
238  isPPC64 ? MVT::i64 : MVT::i32);
241  isPPC64 ? MVT::i64 : MVT::i32);
242 
245  isPPC64 ? MVT::i64 : MVT::i32);
248  isPPC64 ? MVT::i64 : MVT::i32);
249 
252  isPPC64 ? MVT::i64 : MVT::i32);
255  isPPC64 ? MVT::i64 : MVT::i32);
256 
259  isPPC64 ? MVT::i64 : MVT::i32);
262  isPPC64 ? MVT::i64 : MVT::i32);
263  } else {
268  }
269 
270  // PowerPC does not support direct load/store of condition registers.
273 
274  // FIXME: Remove this once the ANDI glue bug is fixed:
275  if (ANDIGlueBug)
277 
278  for (MVT VT : MVT::integer_valuetypes()) {
282  }
283 
284  addRegisterClass(MVT::i1, &PPC::CRBITRCRegClass);
285  }
286 
287  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
288  // PPC (the libcall is not available).
293 
294  // We do not currently implement these libm ops for PowerPC.
301 
302  // PowerPC has no SREM/UREM instructions unless we are on P9
303  // On P9 we may use a hardware instruction to compute the remainder.
304  // When the result of both the remainder and the division is required it is
305  // more efficient to compute the remainder from the result of the division
306  // rather than use the remainder instruction. The instructions are legalized
307  // directly because the DivRemPairsPass performs the transformation at the IR
308  // level.
309  if (Subtarget.isISA3_0()) {
314  } else {
319  }
320 
321  // Don't use SMUL_LOHI/UMUL_LOHI or SDIVREM/UDIVREM to lower SREM/UREM.
330 
331  // Handle constrained floating-point operations of scalar.
332  // TODO: Handle SPE specific operation.
338 
343 
344  if (!Subtarget.hasSPE()) {
347  }
348 
349  if (Subtarget.hasVSX()) {
352  }
353 
354  if (Subtarget.hasFSQRT()) {
357  }
358 
359  if (Subtarget.hasFPRND()) {
364 
369  }
370 
371  // We don't support sin/cos/sqrt/fmod/pow
382  if (Subtarget.hasSPE()) {
385  } else {
388  }
389 
390  if (Subtarget.hasSPE())
392 
394 
395  // If we're enabling GP optimizations, use hardware square root
396  if (!Subtarget.hasFSQRT() &&
397  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTE() &&
398  Subtarget.hasFRE()))
400 
401  if (!Subtarget.hasFSQRT() &&
402  !(TM.Options.UnsafeFPMath && Subtarget.hasFRSQRTES() &&
403  Subtarget.hasFRES()))
405 
406  if (Subtarget.hasFCPSGN()) {
409  } else {
412  }
413 
414  if (Subtarget.hasFPRND()) {
419 
424  }
425 
426  // PowerPC does not have BSWAP, but we can use vector BSWAP instruction xxbrd
427  // to speed up scalar BSWAP64.
428  // CTPOP or CTTZ were introduced in P8/P9 respectively
430  if (Subtarget.hasP9Vector() && Subtarget.isPPC64())
432  else
434  if (Subtarget.isISA3_0()) {
437  } else {
440  }
441 
442  if (Subtarget.hasPOPCNTD() == PPCSubtarget::POPCNTD_Fast) {
445  } else {
448  }
449 
450  // PowerPC does not have ROTR
453 
454  if (!Subtarget.useCRBits()) {
455  // PowerPC does not have Select
460  }
461 
462  // PowerPC wants to turn select_cc of FP into fsel when possible.
465 
466  // PowerPC wants to optimize integer setcc a bit
467  if (!Subtarget.useCRBits())
469 
470  if (Subtarget.hasFPU()) {
474 
478  }
479 
480  // PowerPC does not have BRCOND which requires SetCC
481  if (!Subtarget.useCRBits())
483 
485 
486  if (Subtarget.hasSPE()) {
487  // SPE has built-in conversions
494 
495  // SPE supports signaling compare of f32/f64.
498  } else {
499  // PowerPC turns FP_TO_SINT into FCTIWZ and some load/stores.
502 
503  // PowerPC does not have [U|S]INT_TO_FP
508  }
509 
510  if (Subtarget.hasDirectMove() && isPPC64) {
515  if (TM.Options.UnsafeFPMath) {
524  }
525  } else {
530  }
531 
532  // We cannot sextinreg(i1). Expand to shifts.
534 
535  // NOTE: EH_SJLJ_SETJMP/_LONGJMP supported here is NOT intended to support
536  // SjLj exception handling but a light-weight setjmp/longjmp replacement to
537  // support continuation, user-level threading, and etc.. As a result, no
538  // other SjLj exception interfaces are implemented and please don't build
539  // your own exception handling based on them.
540  // LLVM/Clang supports zero-cost DWARF exception handling.
543 
544  // We want to legalize GlobalAddress and ConstantPool nodes into the
545  // appropriate instructions to materialize the address.
556 
557  // TRAP is legal.
559 
560  // TRAMPOLINE is custom lowered.
563 
564  // VASTART needs to be custom lowered to use the VarArgsFrameIndex
566 
567  if (Subtarget.is64BitELFABI()) {
568  // VAARG always uses double-word chunks, so promote anything smaller.
578  } else if (Subtarget.is32BitELFABI()) {
579  // VAARG is custom lowered with the 32-bit SVR4 ABI.
582  } else
584 
585  // VACOPY is custom lowered with the 32-bit SVR4 ABI.
586  if (Subtarget.is32BitELFABI())
588  else
590 
591  // Use the default implementation.
601 
602  // We want to custom lower some of our intrinsics.
606 
607  // To handle counter-based loop conditions.
609 
614 
615  // Comparisons that require checking two conditions.
616  if (Subtarget.hasSPE()) {
621  }
634 
637 
638  if (Subtarget.has64BitSupport()) {
639  // They also have instructions for converting between i64 and fp.
648  // This is just the low 32 bits of a (signed) fp->i64 conversion.
649  // We cannot do this with Promote because i64 is not a legal type.
652 
653  if (Subtarget.hasLFIWAX() || Subtarget.isPPC64()) {
656  }
657  } else {
658  // PowerPC does not have FP_TO_UINT on 32-bit implementations.
659  if (Subtarget.hasSPE()) {
662  } else {
665  }
666  }
667 
668  // With the instructions enabled under FPCVT, we can do everything.
669  if (Subtarget.hasFPCVT()) {
670  if (Subtarget.has64BitSupport()) {
679  }
680 
689  }
690 
691  if (Subtarget.use64BitRegs()) {
692  // 64-bit PowerPC implementations can support i64 types directly
693  addRegisterClass(MVT::i64, &PPC::G8RCRegClass);
694  // BUILD_PAIR can't be handled natively, and should be expanded to shl/or
696  // 64-bit PowerPC wants to expand i128 shifts itself.
700  } else {
701  // 32-bit PowerPC wants to expand i64 shifts itself.
705  }
706 
707  // PowerPC has better expansions for funnel shifts than the generic
708  // TargetLowering::expandFunnelShift.
709  if (Subtarget.has64BitSupport()) {
712  }
715 
716  if (Subtarget.hasVSX()) {
721  }
722 
723  if (Subtarget.hasAltivec()) {
724  for (MVT VT : { MVT::v16i8, MVT::v8i16, MVT::v4i32 }) {
729  }
730  // First set operation action for all vector types to expand. Then we
731  // will selectively turn on ones that can be effectively codegen'd.
732  for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
733  // add/sub are legal for all supported vector VT's.
736 
737  // For v2i64, these are only valid with P8Vector. This is corrected after
738  // the loop.
739  if (VT.getSizeInBits() <= 128 && VT.getScalarSizeInBits() <= 64) {
744  }
745  else {
750  }
751 
752  if (Subtarget.hasVSX()) {
755  }
756 
757  // Vector instructions introduced in P8
758  if (Subtarget.hasP8Altivec() && (VT.SimpleTy != MVT::v1i128)) {
761  }
762  else {
765  }
766 
767  // Vector instructions introduced in P9
768  if (Subtarget.hasP9Altivec() && (VT.SimpleTy != MVT::v1i128))
770  else
772 
773  // We promote all shuffles to v16i8.
776 
777  // We promote all non-typed operations to v4i32.
793 
794  // No other operations are legal.
832 
833  for (MVT InnerVT : MVT::fixedlen_vector_valuetypes()) {
834  setTruncStoreAction(VT, InnerVT, Expand);
835  setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
836  setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
837  setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
838  }
839  }
841  if (!Subtarget.hasP8Vector()) {
846  }
847 
848  // We can custom expand all VECTOR_SHUFFLEs to VPERM, others we can handle
849  // with merges, splats, etc.
851 
852  // Vector truncates to sub-word integer that fit in an Altivec/VSX register
853  // are cheap, so handle them before they get expanded to scalar.
859 
865  Subtarget.useCRBits() ? Legal : Expand);
879 
880  // Custom lowering ROTL v1i128 to VECTOR_SHUFFLE v16i8.
882  // With hasAltivec set, we can lower ISD::ROTL to vrl(b|h|w).
883  if (Subtarget.hasAltivec())
884  for (auto VT : {MVT::v4i32, MVT::v8i16, MVT::v16i8})
886  // With hasP8Altivec set, we can lower ISD::ROTL to vrld.
887  if (Subtarget.hasP8Altivec())
889 
890  addRegisterClass(MVT::v4f32, &PPC::VRRCRegClass);
891  addRegisterClass(MVT::v4i32, &PPC::VRRCRegClass);
892  addRegisterClass(MVT::v8i16, &PPC::VRRCRegClass);
893  addRegisterClass(MVT::v16i8, &PPC::VRRCRegClass);
894 
897 
898  if (Subtarget.hasVSX()) {
902  }
903 
904  if (Subtarget.hasP8Altivec())
906  else
908 
909  if (Subtarget.isISA3_1()) {
928  }
929 
932 
935 
940 
941  // Altivec does not contain unordered floating-point compare instructions
946 
947  if (Subtarget.hasVSX()) {
950  if (Subtarget.hasP8Vector()) {
953  }
954  if (Subtarget.hasDirectMove() && isPPC64) {
963  }
965 
966  // The nearbyint variants are not allowed to raise the inexact exception
967  // so we can only code-gen them with unsafe math.
968  if (TM.Options.UnsafeFPMath) {
971  }
972 
981 
987 
990 
993 
994  // Share the Altivec comparison restrictions.
999 
1002 
1004 
1005  if (Subtarget.hasP8Vector())
1006  addRegisterClass(MVT::f32, &PPC::VSSRCRegClass);
1007 
1008  addRegisterClass(MVT::f64, &PPC::VSFRCRegClass);
1009 
1010  addRegisterClass(MVT::v4i32, &PPC::VSRCRegClass);
1011  addRegisterClass(MVT::v4f32, &PPC::VSRCRegClass);
1012  addRegisterClass(MVT::v2f64, &PPC::VSRCRegClass);
1013 
1014  if (Subtarget.hasP8Altivec()) {
1018 
1019  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1020  // SRL, but not for SRA because of the instructions available:
1021  // VS{RL} and VS{RL}O. However due to direct move costs, it's not worth
1022  // doing
1026 
1028  }
1029  else {
1033 
1035 
1036  // VSX v2i64 only supports non-arithmetic operations.
1039  }
1040 
1041  if (Subtarget.isISA3_1())
1043  else
1045 
1050 
1052 
1061 
1062  // Custom handling for partial vectors of integers converted to
1063  // floating point. We already have optimal handling for v2i32 through
1064  // the DAG combine, so those aren't necessary.
1081 
1088 
1091 
1092  // Handle constrained floating-point operations of vector.
1093  // The predictor is `hasVSX` because altivec instruction has
1094  // no exception but VSX vector instruction has.
1108 
1122 
1123  addRegisterClass(MVT::v2i64, &PPC::VSRCRegClass);
1124  addRegisterClass(MVT::f128, &PPC::VRRCRegClass);
1125 
1126  for (MVT FPT : MVT::fp_valuetypes())
1128 
1129  // Expand the SELECT to SELECT_CC
1131 
1134 
1135  // No implementation for these ops for PowerPC.
1141  }
1142 
1143  if (Subtarget.hasP8Altivec()) {
1144  addRegisterClass(MVT::v2i64, &PPC::VRRCRegClass);
1145  addRegisterClass(MVT::v1i128, &PPC::VRRCRegClass);
1146  }
1147 
1148  if (Subtarget.hasP9Vector()) {
1151 
1152  // 128 bit shifts can be accomplished via 3 instructions for SHL and
1153  // SRL, but not for SRA because of the instructions available:
1154  // VS{RL} and VS{RL}O.
1158 
1164 
1172 
1179 
1183 
1184  // Handle constrained floating-point operations of fp128
1205  } else if (Subtarget.hasVSX()) {
1208 
1211 
1212  // Set FADD/FSUB as libcall to avoid the legalizer to expand the
1213  // fp_to_uint and int_to_fp.
1216 
1224 
1225  // Expand the fp_extend if the target type is fp128.
1228 
1229  // Expand the fp_round if the source type is fp128.
1230  for (MVT VT : {MVT::f32, MVT::f64}) {
1233  }
1234 
1239 
1240  // Lower following f128 select_cc pattern:
1241  // select_cc x, y, tv, fv, cc -> select_cc (setcc x, y, cc), 0, tv, fv, NE
1243 
1244  // We need to handle f128 SELECT_CC with integer result type.
1247  }
1248 
1249  if (Subtarget.hasP9Altivec()) {
1250  if (Subtarget.isISA3_1()) {
1255  } else {
1258  }
1266  }
1267  }
1268 
1269  if (Subtarget.pairedVectorMemops()) {
1270  addRegisterClass(MVT::v256i1, &PPC::VSRpRCRegClass);
1273  }
1274  if (Subtarget.hasMMA()) {
1275  addRegisterClass(MVT::v512i1, &PPC::UACCRCRegClass);
1279  }
1280 
1281  if (Subtarget.has64BitSupport())
1283 
1284  if (Subtarget.isISA3_1())
1286 
1288 
1289  if (!isPPC64) {
1292  }
1293 
1294  if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics()) {
1299  }
1300 
1302 
1303  if (Subtarget.hasAltivec()) {
1304  // Altivec instructions set fields to all zeros or all ones.
1306  }
1307 
1308  if (!isPPC64) {
1309  // These libcalls are not available in 32-bit.
1310  setLibcallName(RTLIB::SHL_I128, nullptr);
1311  setLibcallName(RTLIB::SRL_I128, nullptr);
1312  setLibcallName(RTLIB::SRA_I128, nullptr);
1313  setLibcallName(RTLIB::MULO_I64, nullptr);
1314  }
1315 
1316  if (!isPPC64)
1318 
1319  setStackPointerRegisterToSaveRestore(isPPC64 ? PPC::X1 : PPC::R1);
1320 
1321  // We have target-specific dag combine patterns for the following nodes:
1330  if (Subtarget.hasFPCVT())
1335  if (Subtarget.useCRBits())
1341 
1345 
1348 
1349 
1350  if (Subtarget.useCRBits()) {
1354  }
1355 
1356  if (Subtarget.hasP9Altivec()) {
1359  }
1360 
1361  setLibcallName(RTLIB::LOG_F128, "logf128");
1362  setLibcallName(RTLIB::LOG2_F128, "log2f128");
1363  setLibcallName(RTLIB::LOG10_F128, "log10f128");
1364  setLibcallName(RTLIB::EXP_F128, "expf128");
1365  setLibcallName(RTLIB::EXP2_F128, "exp2f128");
1366  setLibcallName(RTLIB::SIN_F128, "sinf128");
1367  setLibcallName(RTLIB::COS_F128, "cosf128");
1368  setLibcallName(RTLIB::POW_F128, "powf128");
1369  setLibcallName(RTLIB::FMIN_F128, "fminf128");
1370  setLibcallName(RTLIB::FMAX_F128, "fmaxf128");
1371  setLibcallName(RTLIB::REM_F128, "fmodf128");
1372  setLibcallName(RTLIB::SQRT_F128, "sqrtf128");
1373  setLibcallName(RTLIB::CEIL_F128, "ceilf128");
1374  setLibcallName(RTLIB::FLOOR_F128, "floorf128");
1375  setLibcallName(RTLIB::TRUNC_F128, "truncf128");
1376  setLibcallName(RTLIB::ROUND_F128, "roundf128");
1377  setLibcallName(RTLIB::LROUND_F128, "lroundf128");
1378  setLibcallName(RTLIB::LLROUND_F128, "llroundf128");
1379  setLibcallName(RTLIB::RINT_F128, "rintf128");
1380  setLibcallName(RTLIB::LRINT_F128, "lrintf128");
1381  setLibcallName(RTLIB::LLRINT_F128, "llrintf128");
1382  setLibcallName(RTLIB::NEARBYINT_F128, "nearbyintf128");
1383  setLibcallName(RTLIB::FMA_F128, "fmaf128");
1384 
1385  // With 32 condition bits, we don't need to sink (and duplicate) compares
1386  // aggressively in CodeGenPrep.
1387  if (Subtarget.useCRBits()) {
1390  }
1391 
1393 
1394  switch (Subtarget.getCPUDirective()) {
1395  default: break;
1396  case PPC::DIR_970:
1397  case PPC::DIR_A2:
1398  case PPC::DIR_E500:
1399  case PPC::DIR_E500mc:
1400  case PPC::DIR_E5500:
1401  case PPC::DIR_PWR4:
1402  case PPC::DIR_PWR5:
1403  case PPC::DIR_PWR5X:
1404  case PPC::DIR_PWR6:
1405  case PPC::DIR_PWR6X:
1406  case PPC::DIR_PWR7:
1407  case PPC::DIR_PWR8:
1408  case PPC::DIR_PWR9:
1409  case PPC::DIR_PWR10:
1410  case PPC::DIR_PWR_FUTURE:
1413  break;
1414  }
1415 
1416  if (Subtarget.enableMachineScheduler())
1418  else
1420 
1422 
1423  // The Freescale cores do better with aggressive inlining of memcpy and
1424  // friends. GCC uses same threshold of 128 bytes (= 32 word stores).
1425  if (Subtarget.getCPUDirective() == PPC::DIR_E500mc ||
1426  Subtarget.getCPUDirective() == PPC::DIR_E5500) {
1427  MaxStoresPerMemset = 32;
1429  MaxStoresPerMemcpy = 32;
1431  MaxStoresPerMemmove = 32;
1433  } else if (Subtarget.getCPUDirective() == PPC::DIR_A2) {
1434  // The A2 also benefits from (very) aggressive inlining of memcpy and
1435  // friends. The overhead of a the function call, even when warm, can be
1436  // over one hundred cycles.
1437  MaxStoresPerMemset = 128;
1438  MaxStoresPerMemcpy = 128;
1439  MaxStoresPerMemmove = 128;
1440  MaxLoadsPerMemcmp = 128;
1441  } else {
1442  MaxLoadsPerMemcmp = 8;
1444  }
1445 
1446  IsStrictFPEnabled = true;
1447 
1448  // Let the subtarget (CPU) decide if a predictable select is more expensive
1449  // than the corresponding branch. This information is used in CGP to decide
1450  // when to convert selects into branches.
1452 }
1453 
1454 // *********************************** NOTE ************************************
1455 // For selecting load and store instructions, the addressing modes are defined
1456 // as ComplexPatterns in PPCInstrInfo.td, which are then utilized in the TD
1457 // patterns to match the load the store instructions.
1458 //
1459 // The TD definitions for the addressing modes correspond to their respective
1460 // Select<AddrMode>Form() function in PPCISelDAGToDAG.cpp. These functions rely
1461 // on SelectOptimalAddrMode(), which calls computeMOFlags() to compute the
1462 // address mode flags of a particular node. Afterwards, the computed address
1463 // flags are passed into getAddrModeForFlags() in order to retrieve the optimal
1464 // addressing mode. SelectOptimalAddrMode() then sets the Base and Displacement
1465 // accordingly, based on the preferred addressing mode.
1466 //
1467 // Within PPCISelLowering.h, there are two enums: MemOpFlags and AddrMode.
1468 // MemOpFlags contains all the possible flags that can be used to compute the
1469 // optimal addressing mode for load and store instructions.
1470 // AddrMode contains all the possible load and store addressing modes available
1471 // on Power (such as DForm, DSForm, DQForm, XForm, etc.)
1472 //
1473 // When adding new load and store instructions, it is possible that new address
1474 // flags may need to be added into MemOpFlags, and a new addressing mode will
1475 // need to be added to AddrMode. An entry of the new addressing mode (consisting
1476 // of the minimal and main distinguishing address flags for the new load/store
1477 // instructions) will need to be added into initializeAddrModeMap() below.
1478 // Finally, when adding new addressing modes, the getAddrModeForFlags() will
1479 // need to be updated to account for selecting the optimal addressing mode.
1480 // *****************************************************************************
1481 /// Initialize the map that relates the different addressing modes of the load
1482 /// and store instructions to a set of flags. This ensures the load/store
1483 /// instruction is correctly matched during instruction selection.
1484 void PPCTargetLowering::initializeAddrModeMap() {
1485  AddrModesMap[PPC::AM_DForm] = {
1486  // LWZ, STW
1491  // LBZ, LHZ, STB, STH
1496  // LHA
1501  // LFS, LFD, STFS, STFD
1506  };
1507  AddrModesMap[PPC::AM_DSForm] = {
1508  // LWA
1512  // LD, STD
1516  // DFLOADf32, DFLOADf64, DSTOREf32, DSTOREf64
1520  };
1521  AddrModesMap[PPC::AM_DQForm] = {
1522  // LXV, STXV
1526  };
1527  AddrModesMap[PPC::AM_PrefixDForm] = {PPC::MOF_RPlusSImm34 |
1529  // TODO: Add mapping for quadword load/store.
1530 }
1531 
1532 /// getMaxByValAlign - Helper for getByValTypeAlignment to determine
1533 /// the desired ByVal argument alignment.
1534 static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign) {
1535  if (MaxAlign == MaxMaxAlign)
1536  return;
1537  if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
1538  if (MaxMaxAlign >= 32 &&
1539  VTy->getPrimitiveSizeInBits().getFixedSize() >= 256)
1540  MaxAlign = Align(32);
1541  else if (VTy->getPrimitiveSizeInBits().getFixedSize() >= 128 &&
1542  MaxAlign < 16)
1543  MaxAlign = Align(16);
1544  } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
1545  Align EltAlign;
1546  getMaxByValAlign(ATy->getElementType(), EltAlign, MaxMaxAlign);
1547  if (EltAlign > MaxAlign)
1548  MaxAlign = EltAlign;
1549  } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
1550  for (auto *EltTy : STy->elements()) {
1551  Align EltAlign;
1552  getMaxByValAlign(EltTy, EltAlign, MaxMaxAlign);
1553  if (EltAlign > MaxAlign)
1554  MaxAlign = EltAlign;
1555  if (MaxAlign == MaxMaxAlign)
1556  break;
1557  }
1558  }
1559 }
1560 
1561 /// getByValTypeAlignment - Return the desired alignment for ByVal aggregate
1562 /// function arguments in the caller parameter area.
1564  const DataLayout &DL) const {
1565  // 16byte and wider vectors are passed on 16byte boundary.
1566  // The rest is 8 on PPC64 and 4 on PPC32 boundary.
1567  Align Alignment = Subtarget.isPPC64() ? Align(8) : Align(4);
1568  if (Subtarget.hasAltivec())
1569  getMaxByValAlign(Ty, Alignment, Align(16));
1570  return Alignment.value();
1571 }
1572 
1574  return Subtarget.useSoftFloat();
1575 }
1576 
1578  return Subtarget.hasSPE();
1579 }
1580 
1582  return VT.isScalarInteger();
1583 }
1584 
1585 const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
1586  switch ((PPCISD::NodeType)Opcode) {
1587  case PPCISD::FIRST_NUMBER: break;
1588  case PPCISD::FSEL: return "PPCISD::FSEL";
1589  case PPCISD::XSMAXCDP: return "PPCISD::XSMAXCDP";
1590  case PPCISD::XSMINCDP: return "PPCISD::XSMINCDP";
1591  case PPCISD::FCFID: return "PPCISD::FCFID";
1592  case PPCISD::FCFIDU: return "PPCISD::FCFIDU";
1593  case PPCISD::FCFIDS: return "PPCISD::FCFIDS";
1594  case PPCISD::FCFIDUS: return "PPCISD::FCFIDUS";
1595  case PPCISD::FCTIDZ: return "PPCISD::FCTIDZ";
1596  case PPCISD::FCTIWZ: return "PPCISD::FCTIWZ";
1597  case PPCISD::FCTIDUZ: return "PPCISD::FCTIDUZ";
1598  case PPCISD::FCTIWUZ: return "PPCISD::FCTIWUZ";
1600  return "PPCISD::FP_TO_UINT_IN_VSR,";
1602  return "PPCISD::FP_TO_SINT_IN_VSR";
1603  case PPCISD::FRE: return "PPCISD::FRE";
1604  case PPCISD::FRSQRTE: return "PPCISD::FRSQRTE";
1605  case PPCISD::FTSQRT:
1606  return "PPCISD::FTSQRT";
1607  case PPCISD::FSQRT:
1608  return "PPCISD::FSQRT";
1609  case PPCISD::STFIWX: return "PPCISD::STFIWX";
1610  case PPCISD::VPERM: return "PPCISD::VPERM";
1611  case PPCISD::XXSPLT: return "PPCISD::XXSPLT";
1613  return "PPCISD::XXSPLTI_SP_TO_DP";
1614  case PPCISD::XXSPLTI32DX:
1615  return "PPCISD::XXSPLTI32DX";
1616  case PPCISD::VECINSERT: return "PPCISD::VECINSERT";
1617  case PPCISD::XXPERMDI: return "PPCISD::XXPERMDI";
1618  case PPCISD::VECSHL: return "PPCISD::VECSHL";
1619  case PPCISD::CMPB: return "PPCISD::CMPB";
1620  case PPCISD::Hi: return "PPCISD::Hi";
1621  case PPCISD::Lo: return "PPCISD::Lo";
1622  case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
1623  case PPCISD::ATOMIC_CMP_SWAP_8: return "PPCISD::ATOMIC_CMP_SWAP_8";
1624  case PPCISD::ATOMIC_CMP_SWAP_16: return "PPCISD::ATOMIC_CMP_SWAP_16";
1625  case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
1626  case PPCISD::DYNAREAOFFSET: return "PPCISD::DYNAREAOFFSET";
1627  case PPCISD::PROBED_ALLOCA: return "PPCISD::PROBED_ALLOCA";
1628  case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
1629  case PPCISD::SRL: return "PPCISD::SRL";
1630  case PPCISD::SRA: return "PPCISD::SRA";
1631  case PPCISD::SHL: return "PPCISD::SHL";
1632  case PPCISD::SRA_ADDZE: return "PPCISD::SRA_ADDZE";
1633  case PPCISD::CALL: return "PPCISD::CALL";
1634  case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
1635  case PPCISD::CALL_NOTOC: return "PPCISD::CALL_NOTOC";
1636  case PPCISD::CALL_RM:
1637  return "PPCISD::CALL_RM";
1638  case PPCISD::CALL_NOP_RM:
1639  return "PPCISD::CALL_NOP_RM";
1640  case PPCISD::CALL_NOTOC_RM:
1641  return "PPCISD::CALL_NOTOC_RM";
1642  case PPCISD::MTCTR: return "PPCISD::MTCTR";
1643  case PPCISD::BCTRL: return "PPCISD::BCTRL";
1644  case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
1645  case PPCISD::BCTRL_RM:
1646  return "PPCISD::BCTRL_RM";
1648  return "PPCISD::BCTRL_LOAD_TOC_RM";
1649  case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
1650  case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
1651  case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
1652  case PPCISD::EH_SJLJ_LONGJMP: return "PPCISD::EH_SJLJ_LONGJMP";
1653  case PPCISD::MFOCRF: return "PPCISD::MFOCRF";
1654  case PPCISD::MFVSR: return "PPCISD::MFVSR";
1655  case PPCISD::MTVSRA: return "PPCISD::MTVSRA";
1656  case PPCISD::MTVSRZ: return "PPCISD::MTVSRZ";
1657  case PPCISD::SINT_VEC_TO_FP: return "PPCISD::SINT_VEC_TO_FP";
1658  case PPCISD::UINT_VEC_TO_FP: return "PPCISD::UINT_VEC_TO_FP";
1660  return "PPCISD::SCALAR_TO_VECTOR_PERMUTED";
1662  return "PPCISD::ANDI_rec_1_EQ_BIT";
1664  return "PPCISD::ANDI_rec_1_GT_BIT";
1665  case PPCISD::VCMP: return "PPCISD::VCMP";
1666  case PPCISD::VCMP_rec: return "PPCISD::VCMP_rec";
1667  case PPCISD::LBRX: return "PPCISD::LBRX";
1668  case PPCISD::STBRX: return "PPCISD::STBRX";
1669  case PPCISD::LFIWAX: return "PPCISD::LFIWAX";
1670  case PPCISD::LFIWZX: return "PPCISD::LFIWZX";
1671  case PPCISD::LXSIZX: return "PPCISD::LXSIZX";
1672  case PPCISD::STXSIX: return "PPCISD::STXSIX";
1673  case PPCISD::VEXTS: return "PPCISD::VEXTS";
1674  case PPCISD::LXVD2X: return "PPCISD::LXVD2X";
1675  case PPCISD::STXVD2X: return "PPCISD::STXVD2X";
1676  case PPCISD::LOAD_VEC_BE: return "PPCISD::LOAD_VEC_BE";
1677  case PPCISD::STORE_VEC_BE: return "PPCISD::STORE_VEC_BE";
1679  return "PPCISD::ST_VSR_SCAL_INT";
1680  case PPCISD::COND_BRANCH: return "PPCISD::COND_BRANCH";
1681  case PPCISD::BDNZ: return "PPCISD::BDNZ";
1682  case PPCISD::BDZ: return "PPCISD::BDZ";
1683  case PPCISD::MFFS: return "PPCISD::MFFS";
1684  case PPCISD::FADDRTZ: return "PPCISD::FADDRTZ";
1685  case PPCISD::TC_RETURN: return "PPCISD::TC_RETURN";
1686  case PPCISD::CR6SET: return "PPCISD::CR6SET";
1687  case PPCISD::CR6UNSET: return "PPCISD::CR6UNSET";
1688  case PPCISD::PPC32_GOT: return "PPCISD::PPC32_GOT";
1689  case PPCISD::PPC32_PICGOT: return "PPCISD::PPC32_PICGOT";
1690  case PPCISD::ADDIS_GOT_TPREL_HA: return "PPCISD::ADDIS_GOT_TPREL_HA";
1691  case PPCISD::LD_GOT_TPREL_L: return "PPCISD::LD_GOT_TPREL_L";
1692  case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
1693  case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
1694  case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
1695  case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
1696  case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
1697  case PPCISD::TLSGD_AIX: return "PPCISD::TLSGD_AIX";
1698  case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
1699  case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
1700  case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
1701  case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
1702  case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
1703  case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
1704  case PPCISD::PADDI_DTPREL:
1705  return "PPCISD::PADDI_DTPREL";
1706  case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
1707  case PPCISD::SC: return "PPCISD::SC";
1708  case PPCISD::CLRBHRB: return "PPCISD::CLRBHRB";
1709  case PPCISD::MFBHRBE: return "PPCISD::MFBHRBE";
1710  case PPCISD::RFEBB: return "PPCISD::RFEBB";
1711  case PPCISD::XXSWAPD: return "PPCISD::XXSWAPD";
1712  case PPCISD::SWAP_NO_CHAIN: return "PPCISD::SWAP_NO_CHAIN";
1713  case PPCISD::VABSD: return "PPCISD::VABSD";
1714  case PPCISD::BUILD_FP128: return "PPCISD::BUILD_FP128";
1715  case PPCISD::BUILD_SPE64: return "PPCISD::BUILD_SPE64";
1716  case PPCISD::EXTRACT_SPE: return "PPCISD::EXTRACT_SPE";
1717  case PPCISD::EXTSWSLI: return "PPCISD::EXTSWSLI";
1718  case PPCISD::LD_VSX_LH: return "PPCISD::LD_VSX_LH";
1719  case PPCISD::FP_EXTEND_HALF: return "PPCISD::FP_EXTEND_HALF";
1720  case PPCISD::MAT_PCREL_ADDR: return "PPCISD::MAT_PCREL_ADDR";
1722  return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
1724  return "PPCISD::TLS_LOCAL_EXEC_MAT_ADDR";
1725  case PPCISD::ACC_BUILD: return "PPCISD::ACC_BUILD";
1726  case PPCISD::PAIR_BUILD: return "PPCISD::PAIR_BUILD";
1727  case PPCISD::EXTRACT_VSX_REG: return "PPCISD::EXTRACT_VSX_REG";
1728  case PPCISD::XXMFACC: return "PPCISD::XXMFACC";
1729  case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
1730  case PPCISD::ZEXT_LD_SPLAT: return "PPCISD::ZEXT_LD_SPLAT";
1731  case PPCISD::SEXT_LD_SPLAT: return "PPCISD::SEXT_LD_SPLAT";
1732  case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
1734  return "PPCISD::STRICT_FADDRTZ";
1735  case PPCISD::STRICT_FCTIDZ:
1736  return "PPCISD::STRICT_FCTIDZ";
1737  case PPCISD::STRICT_FCTIWZ:
1738  return "PPCISD::STRICT_FCTIWZ";
1740  return "PPCISD::STRICT_FCTIDUZ";
1742  return "PPCISD::STRICT_FCTIWUZ";
1743  case PPCISD::STRICT_FCFID:
1744  return "PPCISD::STRICT_FCFID";
1745  case PPCISD::STRICT_FCFIDU:
1746  return "PPCISD::STRICT_FCFIDU";
1747  case PPCISD::STRICT_FCFIDS:
1748  return "PPCISD::STRICT_FCFIDS";
1750  return "PPCISD::STRICT_FCFIDUS";
1751  case PPCISD::LXVRZX: return "PPCISD::LXVRZX";
1752  }
1753  return nullptr;
1754 }
1755 
1757  EVT VT) const {
1758  if (!VT.isVector())
1759  return Subtarget.useCRBits() ? MVT::i1 : MVT::i32;
1760 
1762 }
1763 
1765  assert(VT.isFloatingPoint() && "Non-floating-point FMA?");
1766  return true;
1767 }
1768 
1769 //===----------------------------------------------------------------------===//
1770 // Node matching predicates, for use by the tblgen matching code.
1771 //===----------------------------------------------------------------------===//
1772 
1773 /// isFloatingPointZero - Return true if this is 0.0 or -0.0.
1775  if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Op))
1776  return CFP->getValueAPF().isZero();
1777  else if (ISD::isEXTLoad(Op.getNode()) || ISD::isNON_EXTLoad(Op.getNode())) {
1778  // Maybe this has already been legalized into the constant pool?
1779  if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op.getOperand(1)))
1780  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(CP->getConstVal()))
1781  return CFP->getValueAPF().isZero();
1782  }
1783  return false;
1784 }
1785 
1786 /// isConstantOrUndef - Op is either an undef node or a ConstantSDNode. Return
1787 /// true if Op is undef or if it matches the specified value.
1788 static bool isConstantOrUndef(int Op, int Val) {
1789  return Op < 0 || Op == Val;
1790 }
1791 
1792 /// isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a
1793 /// VPKUHUM instruction.
1794 /// The ShuffleKind distinguishes between big-endian operations with
1795 /// two different inputs (0), either-endian operations with two identical
1796 /// inputs (1), and little-endian operations with two different inputs (2).
1797 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1799  SelectionDAG &DAG) {
1800  bool IsLE = DAG.getDataLayout().isLittleEndian();
1801  if (ShuffleKind == 0) {
1802  if (IsLE)
1803  return false;
1804  for (unsigned i = 0; i != 16; ++i)
1805  if (!isConstantOrUndef(N->getMaskElt(i), i*2+1))
1806  return false;
1807  } else if (ShuffleKind == 2) {
1808  if (!IsLE)
1809  return false;
1810  for (unsigned i = 0; i != 16; ++i)
1811  if (!isConstantOrUndef(N->getMaskElt(i), i*2))
1812  return false;
1813  } else if (ShuffleKind == 1) {
1814  unsigned j = IsLE ? 0 : 1;
1815  for (unsigned i = 0; i != 8; ++i)
1816  if (!isConstantOrUndef(N->getMaskElt(i), i*2+j) ||
1817  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j))
1818  return false;
1819  }
1820  return true;
1821 }
1822 
1823 /// isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a
1824 /// VPKUWUM instruction.
1825 /// The ShuffleKind distinguishes between big-endian operations with
1826 /// two different inputs (0), either-endian operations with two identical
1827 /// inputs (1), and little-endian operations with two different inputs (2).
1828 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1830  SelectionDAG &DAG) {
1831  bool IsLE = DAG.getDataLayout().isLittleEndian();
1832  if (ShuffleKind == 0) {
1833  if (IsLE)
1834  return false;
1835  for (unsigned i = 0; i != 16; i += 2)
1836  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+2) ||
1837  !isConstantOrUndef(N->getMaskElt(i+1), i*2+3))
1838  return false;
1839  } else if (ShuffleKind == 2) {
1840  if (!IsLE)
1841  return false;
1842  for (unsigned i = 0; i != 16; i += 2)
1843  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1844  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1))
1845  return false;
1846  } else if (ShuffleKind == 1) {
1847  unsigned j = IsLE ? 0 : 2;
1848  for (unsigned i = 0; i != 8; i += 2)
1849  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1850  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1851  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1852  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1))
1853  return false;
1854  }
1855  return true;
1856 }
1857 
1858 /// isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a
1859 /// VPKUDUM instruction, AND the VPKUDUM instruction exists for the
1860 /// current subtarget.
1861 ///
1862 /// The ShuffleKind distinguishes between big-endian operations with
1863 /// two different inputs (0), either-endian operations with two identical
1864 /// inputs (1), and little-endian operations with two different inputs (2).
1865 /// For the latter, the input operands are swapped (see PPCInstrAltivec.td).
1867  SelectionDAG &DAG) {
1868  const PPCSubtarget& Subtarget =
1869  static_cast<const PPCSubtarget&>(DAG.getSubtarget());
1870  if (!Subtarget.hasP8Vector())
1871  return false;
1872 
1873  bool IsLE = DAG.getDataLayout().isLittleEndian();
1874  if (ShuffleKind == 0) {
1875  if (IsLE)
1876  return false;
1877  for (unsigned i = 0; i != 16; i += 4)
1878  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+4) ||
1879  !isConstantOrUndef(N->getMaskElt(i+1), i*2+5) ||
1880  !isConstantOrUndef(N->getMaskElt(i+2), i*2+6) ||
1881  !isConstantOrUndef(N->getMaskElt(i+3), i*2+7))
1882  return false;
1883  } else if (ShuffleKind == 2) {
1884  if (!IsLE)
1885  return false;
1886  for (unsigned i = 0; i != 16; i += 4)
1887  if (!isConstantOrUndef(N->getMaskElt(i ), i*2) ||
1888  !isConstantOrUndef(N->getMaskElt(i+1), i*2+1) ||
1889  !isConstantOrUndef(N->getMaskElt(i+2), i*2+2) ||
1890  !isConstantOrUndef(N->getMaskElt(i+3), i*2+3))
1891  return false;
1892  } else if (ShuffleKind == 1) {
1893  unsigned j = IsLE ? 0 : 4;
1894  for (unsigned i = 0; i != 8; i += 4)
1895  if (!isConstantOrUndef(N->getMaskElt(i ), i*2+j) ||
1896  !isConstantOrUndef(N->getMaskElt(i+1), i*2+j+1) ||
1897  !isConstantOrUndef(N->getMaskElt(i+2), i*2+j+2) ||
1898  !isConstantOrUndef(N->getMaskElt(i+3), i*2+j+3) ||
1899  !isConstantOrUndef(N->getMaskElt(i+8), i*2+j) ||
1900  !isConstantOrUndef(N->getMaskElt(i+9), i*2+j+1) ||
1901  !isConstantOrUndef(N->getMaskElt(i+10), i*2+j+2) ||
1902  !isConstantOrUndef(N->getMaskElt(i+11), i*2+j+3))
1903  return false;
1904  }
1905  return true;
1906 }
1907 
1908 /// isVMerge - Common function, used to match vmrg* shuffles.
1909 ///
1910 static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize,
1911  unsigned LHSStart, unsigned RHSStart) {
1912  if (N->getValueType(0) != MVT::v16i8)
1913  return false;
1914  assert((UnitSize == 1 || UnitSize == 2 || UnitSize == 4) &&
1915  "Unsupported merge size!");
1916 
1917  for (unsigned i = 0; i != 8/UnitSize; ++i) // Step over units
1918  for (unsigned j = 0; j != UnitSize; ++j) { // Step over bytes within unit
1919  if (!isConstantOrUndef(N->getMaskElt(i*UnitSize*2+j),
1920  LHSStart+j+i*UnitSize) ||
1921  !isConstantOrUndef(N->getMaskElt(i*UnitSize*2+UnitSize+j),
1922  RHSStart+j+i*UnitSize))
1923  return false;
1924  }
1925  return true;
1926 }
1927 
1928 /// isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for
1929 /// a VMRGL* instruction with the specified unit size (1,2 or 4 bytes).
1930 /// The ShuffleKind distinguishes between big-endian merges with two
1931 /// different inputs (0), either-endian merges with two identical inputs (1),
1932 /// and little-endian merges with two different inputs (2). For the latter,
1933 /// the input operands are swapped (see PPCInstrAltivec.td).
1935  unsigned ShuffleKind, SelectionDAG &DAG) {
1936  if (DAG.getDataLayout().isLittleEndian()) {
1937  if (ShuffleKind == 1) // unary
1938  return isVMerge(N, UnitSize, 0, 0);
1939  else if (ShuffleKind == 2) // swapped
1940  return isVMerge(N, UnitSize, 0, 16);
1941  else
1942  return false;
1943  } else {
1944  if (ShuffleKind == 1) // unary
1945  return isVMerge(N, UnitSize, 8, 8);
1946  else if (ShuffleKind == 0) // normal
1947  return isVMerge(N, UnitSize, 8, 24);
1948  else
1949  return false;
1950  }
1951 }
1952 
1953 /// isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for
1954 /// a VMRGH* instruction with the specified unit size (1,2 or 4 bytes).
1955 /// The ShuffleKind distinguishes between big-endian merges with two
1956 /// different inputs (0), either-endian merges with two identical inputs (1),
1957 /// and little-endian merges with two different inputs (2). For the latter,
1958 /// the input operands are swapped (see PPCInstrAltivec.td).
1960  unsigned ShuffleKind, SelectionDAG &DAG) {
1961  if (DAG.getDataLayout().isLittleEndian()) {
1962  if (ShuffleKind == 1) // unary
1963  return isVMerge(N, UnitSize, 8, 8);
1964  else if (ShuffleKind == 2) // swapped
1965  return isVMerge(N, UnitSize, 8, 24);
1966  else
1967  return false;
1968  } else {
1969  if (ShuffleKind == 1) // unary
1970  return isVMerge(N, UnitSize, 0, 0);
1971  else if (ShuffleKind == 0) // normal
1972  return isVMerge(N, UnitSize, 0, 16);
1973  else
1974  return false;
1975  }
1976 }
1977 
1978 /**
1979  * Common function used to match vmrgew and vmrgow shuffles
1980  *
1981  * The indexOffset determines whether to look for even or odd words in
1982  * the shuffle mask. This is based on the of the endianness of the target
1983  * machine.
1984  * - Little Endian:
1985  * - Use offset of 0 to check for odd elements
1986  * - Use offset of 4 to check for even elements
1987  * - Big Endian:
1988  * - Use offset of 0 to check for even elements
1989  * - Use offset of 4 to check for odd elements
1990  * A detailed description of the vector element ordering for little endian and
1991  * big endian can be found at
1992  * http://www.ibm.com/developerworks/library/l-ibm-xl-c-cpp-compiler/index.html
1993  * Targeting your applications - what little endian and big endian IBM XL C/C++
1994  * compiler differences mean to you
1995  *
1996  * The mask to the shuffle vector instruction specifies the indices of the
1997  * elements from the two input vectors to place in the result. The elements are
1998  * numbered in array-access order, starting with the first vector. These vectors
1999  * are always of type v16i8, thus each vector will contain 16 elements of size
2000  * 8. More info on the shuffle vector can be found in the
2001  * http://llvm.org/docs/LangRef.html#shufflevector-instruction
2002  * Language Reference.
2003  *
2004  * The RHSStartValue indicates whether the same input vectors are used (unary)
2005  * or two different input vectors are used, based on the following:
2006  * - If the instruction uses the same vector for both inputs, the range of the
2007  * indices will be 0 to 15. In this case, the RHSStart value passed should
2008  * be 0.
2009  * - If the instruction has two different vectors then the range of the
2010  * indices will be 0 to 31. In this case, the RHSStart value passed should
2011  * be 16 (indices 0-15 specify elements in the first vector while indices 16
2012  * to 31 specify elements in the second vector).
2013  *
2014  * \param[in] N The shuffle vector SD Node to analyze
2015  * \param[in] IndexOffset Specifies whether to look for even or odd elements
2016  * \param[in] RHSStartValue Specifies the starting index for the righthand input
2017  * vector to the shuffle_vector instruction
2018  * \return true iff this shuffle vector represents an even or odd word merge
2019  */
2020 static bool isVMerge(ShuffleVectorSDNode *N, unsigned IndexOffset,
2021  unsigned RHSStartValue) {
2022  if (N->getValueType(0) != MVT::v16i8)
2023  return false;
2024 
2025  for (unsigned i = 0; i < 2; ++i)
2026  for (unsigned j = 0; j < 4; ++j)
2027  if (!isConstantOrUndef(N->getMaskElt(i*4+j),
2028  i*RHSStartValue+j+IndexOffset) ||
2029  !isConstantOrUndef(N->getMaskElt(i*4+j+8),
2030  i*RHSStartValue+j+IndexOffset+8))
2031  return false;
2032  return true;
2033 }
2034 
2035 /**
2036  * Determine if the specified shuffle mask is suitable for the vmrgew or
2037  * vmrgow instructions.
2038  *
2039  * \param[in] N The shuffle vector SD Node to analyze
2040  * \param[in] CheckEven Check for an even merge (true) or an odd merge (false)
2041  * \param[in] ShuffleKind Identify the type of merge:
2042  * - 0 = big-endian merge with two different inputs;
2043  * - 1 = either-endian merge with two identical inputs;
2044  * - 2 = little-endian merge with two different inputs (inputs are swapped for
2045  * little-endian merges).
2046  * \param[in] DAG The current SelectionDAG
2047  * \return true iff this shuffle mask
2048  */
2050  unsigned ShuffleKind, SelectionDAG &DAG) {
2051  if (DAG.getDataLayout().isLittleEndian()) {
2052  unsigned indexOffset = CheckEven ? 4 : 0;
2053  if (ShuffleKind == 1) // Unary
2054  return isVMerge(N, indexOffset, 0);
2055  else if (ShuffleKind == 2) // swapped
2056  return isVMerge(N, indexOffset, 16);
2057  else
2058  return false;
2059  }
2060  else {
2061  unsigned indexOffset = CheckEven ? 0 : 4;
2062  if (ShuffleKind == 1) // Unary
2063  return isVMerge(N, indexOffset, 0);
2064  else if (ShuffleKind == 0) // Normal
2065  return isVMerge(N, indexOffset, 16);
2066  else
2067  return false;
2068  }
2069  return false;
2070 }
2071 
2072 /// isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift
2073 /// amount, otherwise return -1.
2074 /// The ShuffleKind distinguishes between big-endian operations with two
2075 /// different inputs (0), either-endian operations with two identical inputs
2076 /// (1), and little-endian operations with two different inputs (2). For the
2077 /// latter, the input operands are swapped (see PPCInstrAltivec.td).
2078 int PPC::isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind,
2079  SelectionDAG &DAG) {
2080  if (N->getValueType(0) != MVT::v16i8)
2081  return -1;
2082 
2083  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2084 
2085  // Find the first non-undef value in the shuffle mask.
2086  unsigned i;
2087  for (i = 0; i != 16 && SVOp->getMaskElt(i) < 0; ++i)
2088  /*search*/;
2089 
2090  if (i == 16) return -1; // all undef.
2091 
2092  // Otherwise, check to see if the rest of the elements are consecutively
2093  // numbered from this value.
2094  unsigned ShiftAmt = SVOp->getMaskElt(i);
2095  if (ShiftAmt < i) return -1;
2096 
2097  ShiftAmt -= i;
2098  bool isLE = DAG.getDataLayout().isLittleEndian();
2099 
2100  if ((ShuffleKind == 0 && !isLE) || (ShuffleKind == 2 && isLE)) {
2101  // Check the rest of the elements to see if they are consecutive.
2102  for (++i; i != 16; ++i)
2103  if (!isConstantOrUndef(SVOp->getMaskElt(i), ShiftAmt+i))
2104  return -1;
2105  } else if (ShuffleKind == 1) {
2106  // Check the rest of the elements to see if they are consecutive.
2107  for (++i; i != 16; ++i)
2108  if (!isConstantOrUndef(SVOp->getMaskElt(i), (ShiftAmt+i) & 15))
2109  return -1;
2110  } else
2111  return -1;
2112 
2113  if (isLE)
2114  ShiftAmt = 16 - ShiftAmt;
2115 
2116  return ShiftAmt;
2117 }
2118 
2119 /// isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand
2120 /// specifies a splat of a single element that is suitable for input to
2121 /// one of the splat operations (VSPLTB/VSPLTH/VSPLTW/XXSPLTW/LXVDSX/etc.).
2123  assert(N->getValueType(0) == MVT::v16i8 && isPowerOf2_32(EltSize) &&
2124  EltSize <= 8 && "Can only handle 1,2,4,8 byte element sizes");
2125 
2126  // The consecutive indices need to specify an element, not part of two
2127  // different elements. So abandon ship early if this isn't the case.
2128  if (N->getMaskElt(0) % EltSize != 0)
2129  return false;
2130 
2131  // This is a splat operation if each element of the permute is the same, and
2132  // if the value doesn't reference the second vector.
2133  unsigned ElementBase = N->getMaskElt(0);
2134 
2135  // FIXME: Handle UNDEF elements too!
2136  if (ElementBase >= 16)
2137  return false;
2138 
2139  // Check that the indices are consecutive, in the case of a multi-byte element
2140  // splatted with a v16i8 mask.
2141  for (unsigned i = 1; i != EltSize; ++i)
2142  if (N->getMaskElt(i) < 0 || N->getMaskElt(i) != (int)(i+ElementBase))
2143  return false;
2144 
2145  for (unsigned i = EltSize, e = 16; i != e; i += EltSize) {
2146  if (N->getMaskElt(i) < 0) continue;
2147  for (unsigned j = 0; j != EltSize; ++j)
2148  if (N->getMaskElt(i+j) != N->getMaskElt(j))
2149  return false;
2150  }
2151  return true;
2152 }
2153 
2154 /// Check that the mask is shuffling N byte elements. Within each N byte
2155 /// element of the mask, the indices could be either in increasing or
2156 /// decreasing order as long as they are consecutive.
2157 /// \param[in] N the shuffle vector SD Node to analyze
2158 /// \param[in] Width the element width in bytes, could be 2/4/8/16 (HalfWord/
2159 /// Word/DoubleWord/QuadWord).
2160 /// \param[in] StepLen the delta indices number among the N byte element, if
2161 /// the mask is in increasing/decreasing order then it is 1/-1.
2162 /// \return true iff the mask is shuffling N byte elements.
2164  int StepLen) {
2165  assert((Width == 2 || Width == 4 || Width == 8 || Width == 16) &&
2166  "Unexpected element width.");
2167  assert((StepLen == 1 || StepLen == -1) && "Unexpected element width.");
2168 
2169  unsigned NumOfElem = 16 / Width;
2170  unsigned MaskVal[16]; // Width is never greater than 16
2171  for (unsigned i = 0; i < NumOfElem; ++i) {
2172  MaskVal[0] = N->getMaskElt(i * Width);
2173  if ((StepLen == 1) && (MaskVal[0] % Width)) {
2174  return false;
2175  } else if ((StepLen == -1) && ((MaskVal[0] + 1) % Width)) {
2176  return false;
2177  }
2178 
2179  for (unsigned int j = 1; j < Width; ++j) {
2180  MaskVal[j] = N->getMaskElt(i * Width + j);
2181  if (MaskVal[j] != MaskVal[j-1] + StepLen) {
2182  return false;
2183  }
2184  }
2185  }
2186 
2187  return true;
2188 }
2189 
2190 bool PPC::isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts,
2191  unsigned &InsertAtByte, bool &Swap, bool IsLE) {
2192  if (!isNByteElemShuffleMask(N, 4, 1))
2193  return false;
2194 
2195  // Now we look at mask elements 0,4,8,12
2196  unsigned M0 = N->getMaskElt(0) / 4;
2197  unsigned M1 = N->getMaskElt(4) / 4;
2198  unsigned M2 = N->getMaskElt(8) / 4;
2199  unsigned M3 = N->getMaskElt(12) / 4;
2200  unsigned LittleEndianShifts[] = { 2, 1, 0, 3 };
2201  unsigned BigEndianShifts[] = { 3, 0, 1, 2 };
2202 
2203  // Below, let H and L be arbitrary elements of the shuffle mask
2204  // where H is in the range [4,7] and L is in the range [0,3].
2205  // H, 1, 2, 3 or L, 5, 6, 7
2206  if ((M0 > 3 && M1 == 1 && M2 == 2 && M3 == 3) ||
2207  (M0 < 4 && M1 == 5 && M2 == 6 && M3 == 7)) {
2208  ShiftElts = IsLE ? LittleEndianShifts[M0 & 0x3] : BigEndianShifts[M0 & 0x3];
2209  InsertAtByte = IsLE ? 12 : 0;
2210  Swap = M0 < 4;
2211  return true;
2212  }
2213  // 0, H, 2, 3 or 4, L, 6, 7
2214  if ((M1 > 3 && M0 == 0 && M2 == 2 && M3 == 3) ||
2215  (M1 < 4 && M0 == 4 && M2 == 6 && M3 == 7)) {
2216  ShiftElts = IsLE ? LittleEndianShifts[M1 & 0x3] : BigEndianShifts[M1 & 0x3];
2217  InsertAtByte = IsLE ? 8 : 4;
2218  Swap = M1 < 4;
2219  return true;
2220  }
2221  // 0, 1, H, 3 or 4, 5, L, 7
2222  if ((M2 > 3 && M0 == 0 && M1 == 1 && M3 == 3) ||
2223  (M2 < 4 && M0 == 4 && M1 == 5 && M3 == 7)) {
2224  ShiftElts = IsLE ? LittleEndianShifts[M2 & 0x3] : BigEndianShifts[M2 & 0x3];
2225  InsertAtByte = IsLE ? 4 : 8;
2226  Swap = M2 < 4;
2227  return true;
2228  }
2229  // 0, 1, 2, H or 4, 5, 6, L
2230  if ((M3 > 3 && M0 == 0 && M1 == 1 && M2 == 2) ||
2231  (M3 < 4 && M0 == 4 && M1 == 5 && M2 == 6)) {
2232  ShiftElts = IsLE ? LittleEndianShifts[M3 & 0x3] : BigEndianShifts[M3 & 0x3];
2233  InsertAtByte = IsLE ? 0 : 12;
2234  Swap = M3 < 4;
2235  return true;
2236  }
2237 
2238  // If both vector operands for the shuffle are the same vector, the mask will
2239  // contain only elements from the first one and the second one will be undef.
2240  if (N->getOperand(1).isUndef()) {
2241  ShiftElts = 0;
2242  Swap = true;
2243  unsigned XXINSERTWSrcElem = IsLE ? 2 : 1;
2244  if (M0 == XXINSERTWSrcElem && M1 == 1 && M2 == 2 && M3 == 3) {
2245  InsertAtByte = IsLE ? 12 : 0;
2246  return true;
2247  }
2248  if (M0 == 0 && M1 == XXINSERTWSrcElem && M2 == 2 && M3 == 3) {
2249  InsertAtByte = IsLE ? 8 : 4;
2250  return true;
2251  }
2252  if (M0 == 0 && M1 == 1 && M2 == XXINSERTWSrcElem && M3 == 3) {
2253  InsertAtByte = IsLE ? 4 : 8;
2254  return true;
2255  }
2256  if (M0 == 0 && M1 == 1 && M2 == 2 && M3 == XXINSERTWSrcElem) {
2257  InsertAtByte = IsLE ? 0 : 12;
2258  return true;
2259  }
2260  }
2261 
2262  return false;
2263 }
2264 
2266  bool &Swap, bool IsLE) {
2267  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2268  // Ensure each byte index of the word is consecutive.
2269  if (!isNByteElemShuffleMask(N, 4, 1))
2270  return false;
2271 
2272  // Now we look at mask elements 0,4,8,12, which are the beginning of words.
2273  unsigned M0 = N->getMaskElt(0) / 4;
2274  unsigned M1 = N->getMaskElt(4) / 4;
2275  unsigned M2 = N->getMaskElt(8) / 4;
2276  unsigned M3 = N->getMaskElt(12) / 4;
2277 
2278  // If both vector operands for the shuffle are the same vector, the mask will
2279  // contain only elements from the first one and the second one will be undef.
2280  if (N->getOperand(1).isUndef()) {
2281  assert(M0 < 4 && "Indexing into an undef vector?");
2282  if (M1 != (M0 + 1) % 4 || M2 != (M1 + 1) % 4 || M3 != (M2 + 1) % 4)
2283  return false;
2284 
2285  ShiftElts = IsLE ? (4 - M0) % 4 : M0;
2286  Swap = false;
2287  return true;
2288  }
2289 
2290  // Ensure each word index of the ShuffleVector Mask is consecutive.
2291  if (M1 != (M0 + 1) % 8 || M2 != (M1 + 1) % 8 || M3 != (M2 + 1) % 8)
2292  return false;
2293 
2294  if (IsLE) {
2295  if (M0 == 0 || M0 == 7 || M0 == 6 || M0 == 5) {
2296  // Input vectors don't need to be swapped if the leading element
2297  // of the result is one of the 3 left elements of the second vector
2298  // (or if there is no shift to be done at all).
2299  Swap = false;
2300  ShiftElts = (8 - M0) % 8;
2301  } else if (M0 == 4 || M0 == 3 || M0 == 2 || M0 == 1) {
2302  // Input vectors need to be swapped if the leading element
2303  // of the result is one of the 3 left elements of the first vector
2304  // (or if we're shifting by 4 - thereby simply swapping the vectors).
2305  Swap = true;
2306  ShiftElts = (4 - M0) % 4;
2307  }
2308 
2309  return true;
2310  } else { // BE
2311  if (M0 == 0 || M0 == 1 || M0 == 2 || M0 == 3) {
2312  // Input vectors don't need to be swapped if the leading element
2313  // of the result is one of the 4 elements of the first vector.
2314  Swap = false;
2315  ShiftElts = M0;
2316  } else if (M0 == 4 || M0 == 5 || M0 == 6 || M0 == 7) {
2317  // Input vectors need to be swapped if the leading element
2318  // of the result is one of the 4 elements of the right vector.
2319  Swap = true;
2320  ShiftElts = M0 - 4;
2321  }
2322 
2323  return true;
2324  }
2325 }
2326 
2328  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2329 
2330  if (!isNByteElemShuffleMask(N, Width, -1))
2331  return false;
2332 
2333  for (int i = 0; i < 16; i += Width)
2334  if (N->getMaskElt(i) != i + Width - 1)
2335  return false;
2336 
2337  return true;
2338 }
2339 
2341  return isXXBRShuffleMaskHelper(N, 2);
2342 }
2343 
2345  return isXXBRShuffleMaskHelper(N, 4);
2346 }
2347 
2349  return isXXBRShuffleMaskHelper(N, 8);
2350 }
2351 
2353  return isXXBRShuffleMaskHelper(N, 16);
2354 }
2355 
2356 /// Can node \p N be lowered to an XXPERMDI instruction? If so, set \p Swap
2357 /// if the inputs to the instruction should be swapped and set \p DM to the
2358 /// value for the immediate.
2359 /// Specifically, set \p Swap to true only if \p N can be lowered to XXPERMDI
2360 /// AND element 0 of the result comes from the first input (LE) or second input
2361 /// (BE). Set \p DM to the calculated result (0-3) only if \p N can be lowered.
2362 /// \return true iff the given mask of shuffle node \p N is a XXPERMDI shuffle
2363 /// mask.
2365  bool &Swap, bool IsLE) {
2366  assert(N->getValueType(0) == MVT::v16i8 && "Shuffle vector expects v16i8");
2367 
2368  // Ensure each byte index of the double word is consecutive.
2369  if (!isNByteElemShuffleMask(N, 8, 1))
2370  return false;
2371 
2372  unsigned M0 = N->getMaskElt(0) / 8;
2373  unsigned M1 = N->getMaskElt(8) / 8;
2374  assert(((M0 | M1) < 4) && "A mask element out of bounds?");
2375 
2376  // If both vector operands for the shuffle are the same vector, the mask will
2377  // contain only elements from the first one and the second one will be undef.
2378  if (N->getOperand(1).isUndef()) {
2379  if ((M0 | M1) < 2) {
2380  DM = IsLE ? (((~M1) & 1) << 1) + ((~M0) & 1) : (M0 << 1) + (M1 & 1);
2381  Swap = false;
2382  return true;
2383  } else
2384  return false;
2385  }
2386 
2387  if (IsLE) {
2388  if (M0 > 1 && M1 < 2) {
2389  Swap = false;
2390  } else if (M0 < 2 && M1 > 1) {
2391  M0 = (M0 + 2) % 4;
2392  M1 = (M1 + 2) % 4;
2393  Swap = true;
2394  } else
2395  return false;
2396 
2397  // Note: if control flow comes here that means Swap is already set above
2398  DM = (((~M1) & 1) << 1) + ((~M0) & 1);
2399  return true;
2400  } else { // BE
2401  if (M0 < 2 && M1 > 1) {
2402  Swap = false;
2403  } else if (M0 > 1 && M1 < 2) {
2404  M0 = (M0 + 2) % 4;
2405  M1 = (M1 + 2) % 4;
2406  Swap = true;
2407  } else
2408  return false;
2409 
2410  // Note: if control flow comes here that means Swap is already set above
2411  DM = (M0 << 1) + (M1 & 1);
2412  return true;
2413  }
2414 }
2415 
2416 
2417 /// getSplatIdxForPPCMnemonics - Return the splat index as a value that is
2418 /// appropriate for PPC mnemonics (which have a big endian bias - namely
2419 /// elements are counted from the left of the vector register).
2420 unsigned PPC::getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize,
2421  SelectionDAG &DAG) {
2422  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(N);
2423  assert(isSplatShuffleMask(SVOp, EltSize));
2424  if (DAG.getDataLayout().isLittleEndian())
2425  return (16 / EltSize) - 1 - (SVOp->getMaskElt(0) / EltSize);
2426  else
2427  return SVOp->getMaskElt(0) / EltSize;
2428 }
2429 
2430 /// get_VSPLTI_elt - If this is a build_vector of constants which can be formed
2431 /// by using a vspltis[bhw] instruction of the specified element size, return
2432 /// the constant being splatted. The ByteSize field indicates the number of
2433 /// bytes of each element [124] -> [bhw].
2434 SDValue PPC::get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG) {
2435  SDValue OpVal;
2436 
2437  // If ByteSize of the splat is bigger than the element size of the
2438  // build_vector, then we have a case where we are checking for a splat where
2439  // multiple elements of the buildvector are folded together into a single
2440  // logical element of the splat (e.g. "vsplish 1" to splat {0,1}*8).
2441  unsigned EltSize = 16/N->getNumOperands();
2442  if (EltSize < ByteSize) {
2443  unsigned Multiple = ByteSize/EltSize; // Number of BV entries per spltval.
2444  SDValue UniquedVals[4];
2445  assert(Multiple > 1 && Multiple <= 4 && "How can this happen?");
2446 
2447  // See if all of the elements in the buildvector agree across.
2448  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2449  if (N->getOperand(i).isUndef()) continue;
2450  // If the element isn't a constant, bail fully out.
2451  if (!isa<ConstantSDNode>(N->getOperand(i))) return SDValue();
2452 
2453  if (!UniquedVals[i&(Multiple-1)].getNode())
2454  UniquedVals[i&(Multiple-1)] = N->getOperand(i);
2455  else if (UniquedVals[i&(Multiple-1)] != N->getOperand(i))
2456  return SDValue(); // no match.
2457  }
2458 
2459  // Okay, if we reached this point, UniquedVals[0..Multiple-1] contains
2460  // either constant or undef values that are identical for each chunk. See
2461  // if these chunks can form into a larger vspltis*.
2462 
2463  // Check to see if all of the leading entries are either 0 or -1. If
2464  // neither, then this won't fit into the immediate field.
2465  bool LeadingZero = true;
2466  bool LeadingOnes = true;
2467  for (unsigned i = 0; i != Multiple-1; ++i) {
2468  if (!UniquedVals[i].getNode()) continue; // Must have been undefs.
2469 
2470  LeadingZero &= isNullConstant(UniquedVals[i]);
2471  LeadingOnes &= isAllOnesConstant(UniquedVals[i]);
2472  }
2473  // Finally, check the least significant entry.
2474  if (LeadingZero) {
2475  if (!UniquedVals[Multiple-1].getNode())
2476  return DAG.getTargetConstant(0, SDLoc(N), MVT::i32); // 0,0,0,undef
2477  int Val = cast<ConstantSDNode>(UniquedVals[Multiple-1])->getZExtValue();
2478  if (Val < 16) // 0,0,0,4 -> vspltisw(4)
2479  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2480  }
2481  if (LeadingOnes) {
2482  if (!UniquedVals[Multiple-1].getNode())
2483  return DAG.getTargetConstant(~0U, SDLoc(N), MVT::i32); // -1,-1,-1,undef
2484  int Val =cast<ConstantSDNode>(UniquedVals[Multiple-1])->getSExtValue();
2485  if (Val >= -16) // -1,-1,-1,-2 -> vspltisw(-2)
2486  return DAG.getTargetConstant(Val, SDLoc(N), MVT::i32);
2487  }
2488 
2489  return SDValue();
2490  }
2491 
2492  // Check to see if this buildvec has a single non-undef value in its elements.
2493  for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
2494  if (N->getOperand(i).isUndef()) continue;
2495  if (!OpVal.getNode())
2496  OpVal = N->getOperand(i);
2497  else if (OpVal != N->getOperand(i))
2498  return SDValue();
2499  }
2500 
2501  if (!OpVal.getNode()) return SDValue(); // All UNDEF: use implicit def.
2502 
2503  unsigned ValSizeInBytes = EltSize;
2504  uint64_t Value = 0;
2505  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal)) {
2506  Value = CN->getZExtValue();
2507  } else if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(OpVal)) {
2508  assert(CN->getValueType(0) == MVT::f32 && "Only one legal FP vector type!");
2509  Value = FloatToBits(CN->getValueAPF().convertToFloat());
2510  }
2511 
2512  // If the splat value is larger than the element value, then we can never do
2513  // this splat. The only case that we could fit the replicated bits into our
2514  // immediate field for would be zero, and we prefer to use vxor for it.
2515  if (ValSizeInBytes < ByteSize) return SDValue();
2516 
2517  // If the element value is larger than the splat value, check if it consists
2518  // of a repeated bit pattern of size ByteSize.
2519  if (!APInt(ValSizeInBytes * 8, Value).isSplat(ByteSize * 8))
2520  return SDValue();
2521 
2522  // Properly sign extend the value.
2523  int MaskVal = SignExtend32(Value, ByteSize * 8);
2524 
2525  // If this is zero, don't match, zero matches ISD::isBuildVectorAllZeros.
2526  if (MaskVal == 0) return SDValue();
2527 
2528  // Finally, if this value fits in a 5 bit sext field, return it
2529  if (SignExtend32<5>(MaskVal) == MaskVal)
2530  return DAG.getTargetConstant(MaskVal, SDLoc(N), MVT::i32);
2531  return SDValue();
2532 }
2533 
2534 //===----------------------------------------------------------------------===//
2535 // Addressing Mode Selection
2536 //===----------------------------------------------------------------------===//
2537 
2538 /// isIntS16Immediate - This method tests to see if the node is either a 32-bit
2539 /// or 64-bit immediate, and if the value can be accurately represented as a
2540 /// sign extension from a 16-bit value. If so, this returns true and the
2541 /// immediate.
2542 bool llvm::isIntS16Immediate(SDNode *N, int16_t &Imm) {
2543  if (!isa<ConstantSDNode>(N))
2544  return false;
2545 
2546  Imm = (int16_t)cast<ConstantSDNode>(N)->getZExtValue();
2547  if (N->getValueType(0) == MVT::i32)
2548  return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
2549  else
2550  return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2551 }
2552 bool llvm::isIntS16Immediate(SDValue Op, int16_t &Imm) {
2553  return isIntS16Immediate(Op.getNode(), Imm);
2554 }
2555 
2556 /// Used when computing address flags for selecting loads and stores.
2557 /// If we have an OR, check if the LHS and RHS are provably disjoint.
2558 /// An OR of two provably disjoint values is equivalent to an ADD.
2559 /// Most PPC load/store instructions compute the effective address as a sum,
2560 /// so doing this conversion is useful.
2561 static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N) {
2562  if (N.getOpcode() != ISD::OR)
2563  return false;
2564  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2565  if (!LHSKnown.Zero.getBoolValue())
2566  return false;
2567  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2568  return (~(LHSKnown.Zero | RHSKnown.Zero) == 0);
2569 }
2570 
2571 /// SelectAddressEVXRegReg - Given the specified address, check to see if it can
2572 /// be represented as an indexed [r+r] operation.
2574  SDValue &Index,
2575  SelectionDAG &DAG) const {
2576  for (SDNode *U : N->uses()) {
2577  if (MemSDNode *Memop = dyn_cast<MemSDNode>(U)) {
2578  if (Memop->getMemoryVT() == MVT::f64) {
2579  Base = N.getOperand(0);
2580  Index = N.getOperand(1);
2581  return true;
2582  }
2583  }
2584  }
2585  return false;
2586 }
2587 
2588 /// isIntS34Immediate - This method tests if value of node given can be
2589 /// accurately represented as a sign extension from a 34-bit value. If so,
2590 /// this returns true and the immediate.
2591 bool llvm::isIntS34Immediate(SDNode *N, int64_t &Imm) {
2592  if (!isa<ConstantSDNode>(N))
2593  return false;
2594 
2595  Imm = (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
2596  return isInt<34>(Imm);
2597 }
2598 bool llvm::isIntS34Immediate(SDValue Op, int64_t &Imm) {
2599  return isIntS34Immediate(Op.getNode(), Imm);
2600 }
2601 
2602 /// SelectAddressRegReg - Given the specified addressed, check to see if it
2603 /// can be represented as an indexed [r+r] operation. Returns false if it
2604 /// can be more efficiently represented as [r+imm]. If \p EncodingAlignment is
2605 /// non-zero and N can be represented by a base register plus a signed 16-bit
2606 /// displacement, make a more precise judgement by checking (displacement % \p
2607 /// EncodingAlignment).
2610  MaybeAlign EncodingAlignment) const {
2611  // If we have a PC Relative target flag don't select as [reg+reg]. It will be
2612  // a [pc+imm].
2613  if (SelectAddressPCRel(N, Base))
2614  return false;
2615 
2616  int16_t Imm = 0;
2617  if (N.getOpcode() == ISD::ADD) {
2618  // Is there any SPE load/store (f64), which can't handle 16bit offset?
2619  // SPE load/store can only handle 8-bit offsets.
2620  if (hasSPE() && SelectAddressEVXRegReg(N, Base, Index, DAG))
2621  return true;
2622  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2623  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2624  return false; // r+i
2625  if (N.getOperand(1).getOpcode() == PPCISD::Lo)
2626  return false; // r+i
2627 
2628  Base = N.getOperand(0);
2629  Index = N.getOperand(1);
2630  return true;
2631  } else if (N.getOpcode() == ISD::OR) {
2632  if (isIntS16Immediate(N.getOperand(1), Imm) &&
2633  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm)))
2634  return false; // r+i can fold it if we can.
2635 
2636  // If this is an or of disjoint bitfields, we can codegen this as an add
2637  // (for better address arithmetic) if the LHS and RHS of the OR are provably
2638  // disjoint.
2639  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2640 
2641  if (LHSKnown.Zero.getBoolValue()) {
2642  KnownBits RHSKnown = DAG.computeKnownBits(N.getOperand(1));
2643  // If all of the bits are known zero on the LHS or RHS, the add won't
2644  // carry.
2645  if (~(LHSKnown.Zero | RHSKnown.Zero) == 0) {
2646  Base = N.getOperand(0);
2647  Index = N.getOperand(1);
2648  return true;
2649  }
2650  }
2651  }
2652 
2653  return false;
2654 }
2655 
2656 // If we happen to be doing an i64 load or store into a stack slot that has
2657 // less than a 4-byte alignment, then the frame-index elimination may need to
2658 // use an indexed load or store instruction (because the offset may not be a
2659 // multiple of 4). The extra register needed to hold the offset comes from the
2660 // register scavenger, and it is possible that the scavenger will need to use
2661 // an emergency spill slot. As a result, we need to make sure that a spill slot
2662 // is allocated when doing an i64 load/store into a less-than-4-byte-aligned
2663 // stack slot.
2664 static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT) {
2665  // FIXME: This does not handle the LWA case.
2666  if (VT != MVT::i64)
2667  return;
2668 
2669  // NOTE: We'll exclude negative FIs here, which come from argument
2670  // lowering, because there are no known test cases triggering this problem
2671  // using packed structures (or similar). We can remove this exclusion if
2672  // we find such a test case. The reason why this is so test-case driven is
2673  // because this entire 'fixup' is only to prevent crashes (from the
2674  // register scavenger) on not-really-valid inputs. For example, if we have:
2675  // %a = alloca i1
2676  // %b = bitcast i1* %a to i64*
2677  // store i64* a, i64 b
2678  // then the store should really be marked as 'align 1', but is not. If it
2679  // were marked as 'align 1' then the indexed form would have been
2680  // instruction-selected initially, and the problem this 'fixup' is preventing
2681  // won't happen regardless.
2682  if (FrameIdx < 0)
2683  return;
2684 
2685  MachineFunction &MF = DAG.getMachineFunction();
2686  MachineFrameInfo &MFI = MF.getFrameInfo();
2687 
2688  if (MFI.getObjectAlign(FrameIdx) >= Align(4))
2689  return;
2690 
2691  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
2692  FuncInfo->setHasNonRISpills();
2693 }
2694 
2695 /// Returns true if the address N can be represented by a base register plus
2696 /// a signed 16-bit displacement [r+imm], and if it is not better
2697 /// represented as reg+reg. If \p EncodingAlignment is non-zero, only accept
2698 /// displacements that are multiples of that value.
2700  SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG,
2701  MaybeAlign EncodingAlignment) const {
2702  // FIXME dl should come from parent load or store, not from address
2703  SDLoc dl(N);
2704 
2705  // If we have a PC Relative target flag don't select as [reg+imm]. It will be
2706  // a [pc+imm].
2707  if (SelectAddressPCRel(N, Base))
2708  return false;
2709 
2710  // If this can be more profitably realized as r+r, fail.
2711  if (SelectAddressRegReg(N, Disp, Base, DAG, EncodingAlignment))
2712  return false;
2713 
2714  if (N.getOpcode() == ISD::ADD) {
2715  int16_t imm = 0;
2716  if (isIntS16Immediate(N.getOperand(1), imm) &&
2717  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2718  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2719  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2720  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2721  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2722  } else {
2723  Base = N.getOperand(0);
2724  }
2725  return true; // [r+i]
2726  } else if (N.getOperand(1).getOpcode() == PPCISD::Lo) {
2727  // Match LOAD (ADD (X, Lo(G))).
2728  assert(!cast<ConstantSDNode>(N.getOperand(1).getOperand(1))->getZExtValue()
2729  && "Cannot handle constant offsets yet!");
2730  Disp = N.getOperand(1).getOperand(0); // The global address.
2733  Disp.getOpcode() == ISD::TargetConstantPool ||
2734  Disp.getOpcode() == ISD::TargetJumpTable);
2735  Base = N.getOperand(0);
2736  return true; // [&g+r]
2737  }
2738  } else if (N.getOpcode() == ISD::OR) {
2739  int16_t imm = 0;
2740  if (isIntS16Immediate(N.getOperand(1), imm) &&
2741  (!EncodingAlignment || isAligned(*EncodingAlignment, imm))) {
2742  // If this is an or of disjoint bitfields, we can codegen this as an add
2743  // (for better address arithmetic) if the LHS and RHS of the OR are
2744  // provably disjoint.
2745  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2746 
2747  if ((LHSKnown.Zero.getZExtValue()|~(uint64_t)imm) == ~0ULL) {
2748  // If all of the bits are known zero on the LHS or RHS, the add won't
2749  // carry.
2750  if (FrameIndexSDNode *FI =
2751  dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
2752  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2753  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2754  } else {
2755  Base = N.getOperand(0);
2756  }
2757  Disp = DAG.getTargetConstant(imm, dl, N.getValueType());
2758  return true;
2759  }
2760  }
2761  } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
2762  // Loading from a constant address.
2763 
2764  // If this address fits entirely in a 16-bit sext immediate field, codegen
2765  // this as "d, 0"
2766  int16_t Imm;
2767  if (isIntS16Immediate(CN, Imm) &&
2768  (!EncodingAlignment || isAligned(*EncodingAlignment, Imm))) {
2769  Disp = DAG.getTargetConstant(Imm, dl, CN->getValueType(0));
2770  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2771  CN->getValueType(0));
2772  return true;
2773  }
2774 
2775  // Handle 32-bit sext immediates with LIS + addr mode.
2776  if ((CN->getValueType(0) == MVT::i32 ||
2777  (int64_t)CN->getZExtValue() == (int)CN->getZExtValue()) &&
2778  (!EncodingAlignment ||
2779  isAligned(*EncodingAlignment, CN->getZExtValue()))) {
2780  int Addr = (int)CN->getZExtValue();
2781 
2782  // Otherwise, break this down into an LIS + disp.
2783  Disp = DAG.getTargetConstant((short)Addr, dl, MVT::i32);
2784 
2785  Base = DAG.getTargetConstant((Addr - (signed short)Addr) >> 16, dl,
2786  MVT::i32);
2787  unsigned Opc = CN->getValueType(0) == MVT::i32 ? PPC::LIS : PPC::LIS8;
2788  Base = SDValue(DAG.getMachineNode(Opc, dl, CN->getValueType(0), Base), 0);
2789  return true;
2790  }
2791  }
2792 
2793  Disp = DAG.getTargetConstant(0, dl, getPointerTy(DAG.getDataLayout()));
2794  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
2795  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2796  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
2797  } else
2798  Base = N;
2799  return true; // [r+0]
2800 }
2801 
2802 /// Similar to the 16-bit case but for instructions that take a 34-bit
2803 /// displacement field (prefixed loads/stores).
2805  SDValue &Base,
2806  SelectionDAG &DAG) const {
2807  // Only on 64-bit targets.
2808  if (N.getValueType() != MVT::i64)
2809  return false;
2810 
2811  SDLoc dl(N);
2812  int64_t Imm = 0;
2813 
2814  if (N.getOpcode() == ISD::ADD) {
2815  if (!isIntS34Immediate(N.getOperand(1), Imm))
2816  return false;
2817  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2818  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2819  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2820  else
2821  Base = N.getOperand(0);
2822  return true;
2823  }
2824 
2825  if (N.getOpcode() == ISD::OR) {
2826  if (!isIntS34Immediate(N.getOperand(1), Imm))
2827  return false;
2828  // If this is an or of disjoint bitfields, we can codegen this as an add
2829  // (for better address arithmetic) if the LHS and RHS of the OR are
2830  // provably disjoint.
2831  KnownBits LHSKnown = DAG.computeKnownBits(N.getOperand(0));
2832  if ((LHSKnown.Zero.getZExtValue() | ~(uint64_t)Imm) != ~0ULL)
2833  return false;
2834  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
2835  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
2836  else
2837  Base = N.getOperand(0);
2838  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2839  return true;
2840  }
2841 
2842  if (isIntS34Immediate(N, Imm)) { // If the address is a 34-bit const.
2843  Disp = DAG.getTargetConstant(Imm, dl, N.getValueType());
2844  Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
2845  return true;
2846  }
2847 
2848  return false;
2849 }
2850 
2851 /// SelectAddressRegRegOnly - Given the specified addressed, force it to be
2852 /// represented as an indexed [r+r] operation.
2854  SDValue &Index,
2855  SelectionDAG &DAG) const {
2856  // Check to see if we can easily represent this as an [r+r] address. This
2857  // will fail if it thinks that the address is more profitably represented as
2858  // reg+imm, e.g. where imm = 0.
2859  if (SelectAddressRegReg(N, Base, Index, DAG))
2860  return true;
2861 
2862  // If the address is the result of an add, we will utilize the fact that the
2863  // address calculation includes an implicit add. However, we can reduce
2864  // register pressure if we do not materialize a constant just for use as the
2865  // index register. We only get rid of the add if it is not an add of a
2866  // value and a 16-bit signed constant and both have a single use.
2867  int16_t imm = 0;
2868  if (N.getOpcode() == ISD::ADD &&
2869  (!isIntS16Immediate(N.getOperand(1), imm) ||
2870  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
2871  Base = N.getOperand(0);
2872  Index = N.getOperand(1);
2873  return true;
2874  }
2875 
2876  // Otherwise, do it the hard way, using R0 as the base register.
2877  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
2878  N.getValueType());
2879  Index = N;
2880  return true;
2881 }
2882 
2883 template <typename Ty> static bool isValidPCRelNode(SDValue N) {
2884  Ty *PCRelCand = dyn_cast<Ty>(N);
2885  return PCRelCand && (PCRelCand->getTargetFlags() & PPCII::MO_PCREL_FLAG);
2886 }
2887 
2888 /// Returns true if this address is a PC Relative address.
2889 /// PC Relative addresses are marked with the flag PPCII::MO_PCREL_FLAG
2890 /// or if the node opcode is PPCISD::MAT_PCREL_ADDR.
2892  // This is a materialize PC Relative node. Always select this as PC Relative.
2893  Base = N;
2894  if (N.getOpcode() == PPCISD::MAT_PCREL_ADDR)
2895  return true;
2896  if (isValidPCRelNode<ConstantPoolSDNode>(N) ||
2897  isValidPCRelNode<GlobalAddressSDNode>(N) ||
2898  isValidPCRelNode<JumpTableSDNode>(N) ||
2899  isValidPCRelNode<BlockAddressSDNode>(N))
2900  return true;
2901  return false;
2902 }
2903 
2904 /// Returns true if we should use a direct load into vector instruction
2905 /// (such as lxsd or lfd), instead of a load into gpr + direct move sequence.
2907 
2908  // If there are any other uses other than scalar to vector, then we should
2909  // keep it as a scalar load -> direct move pattern to prevent multiple
2910  // loads.
2911  LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
2912  if (!LD)
2913  return false;
2914 
2915  EVT MemVT = LD->getMemoryVT();
2916  if (!MemVT.isSimple())
2917  return false;
2918  switch(MemVT.getSimpleVT().SimpleTy) {
2919  case MVT::i64:
2920  break;
2921  case MVT::i32:
2922  if (!ST.hasP8Vector())
2923  return false;
2924  break;
2925  case MVT::i16:
2926  case MVT::i8:
2927  if (!ST.hasP9Vector())
2928  return false;
2929  break;
2930  default:
2931  return false;
2932  }
2933 
2934  SDValue LoadedVal(N, 0);
2935  if (!LoadedVal.hasOneUse())
2936  return false;
2937 
2938  for (SDNode::use_iterator UI = LD->use_begin(), UE = LD->use_end();
2939  UI != UE; ++UI)
2940  if (UI.getUse().get().getResNo() == 0 &&
2941  UI->getOpcode() != ISD::SCALAR_TO_VECTOR &&
2942  UI->getOpcode() != PPCISD::SCALAR_TO_VECTOR_PERMUTED)
2943  return false;
2944 
2945  return true;
2946 }
2947 
2948 /// getPreIndexedAddressParts - returns true by value, base pointer and
2949 /// offset pointer and addressing mode by reference if the node's address
2950 /// can be legally represented as pre-indexed load / store address.
2952  SDValue &Offset,
2953  ISD::MemIndexedMode &AM,
2954  SelectionDAG &DAG) const {
2955  if (DisablePPCPreinc) return false;
2956 
2957  bool isLoad = true;
2958  SDValue Ptr;
2959  EVT VT;
2960  unsigned Alignment;
2961  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
2962  Ptr = LD->getBasePtr();
2963  VT = LD->getMemoryVT();
2964  Alignment = LD->getAlignment();
2965  } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) {
2966  Ptr = ST->getBasePtr();
2967  VT = ST->getMemoryVT();
2968  Alignment = ST->getAlignment();
2969  isLoad = false;
2970  } else
2971  return false;
2972 
2973  // Do not generate pre-inc forms for specific loads that feed scalar_to_vector
2974  // instructions because we can fold these into a more efficient instruction
2975  // instead, (such as LXSD).
2976  if (isLoad && usePartialVectorLoads(N, Subtarget)) {
2977  return false;
2978  }
2979 
2980  // PowerPC doesn't have preinc load/store instructions for vectors
2981  if (VT.isVector())
2982  return false;
2983 
2984  if (SelectAddressRegReg(Ptr, Base, Offset, DAG)) {
2985  // Common code will reject creating a pre-inc form if the base pointer
2986  // is a frame index, or if N is a store and the base pointer is either
2987  // the same as or a predecessor of the value being stored. Check for
2988  // those situations here, and try with swapped Base/Offset instead.
2989  bool Swap = false;
2990 
2991  if (isa<FrameIndexSDNode>(Base) || isa<RegisterSDNode>(Base))
2992  Swap = true;
2993  else if (!isLoad) {
2994  SDValue Val = cast<StoreSDNode>(N)->getValue();
2995  if (Val == Base || Base.getNode()->isPredecessorOf(Val.getNode()))
2996  Swap = true;
2997  }
2998 
2999  if (Swap)
3000  std::swap(Base, Offset);
3001 
3002  AM = ISD::PRE_INC;
3003  return true;
3004  }
3005 
3006  // LDU/STU can only handle immediates that are a multiple of 4.
3007  if (VT != MVT::i64) {
3008  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, None))
3009  return false;
3010  } else {
3011  // LDU/STU need an address with at least 4-byte alignment.
3012  if (Alignment < 4)
3013  return false;
3014 
3015  if (!SelectAddressRegImm(Ptr, Offset, Base, DAG, Align(4)))
3016  return false;
3017  }
3018 
3019  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
3020  // PPC64 doesn't have lwau, but it does have lwaux. Reject preinc load of
3021  // sext i32 to i64 when addr mode is r+i.
3022  if (LD->getValueType(0) == MVT::i64 && LD->getMemoryVT() == MVT::i32 &&
3023  LD->getExtensionType() == ISD::SEXTLOAD &&
3024  isa<ConstantSDNode>(Offset))
3025  return false;
3026  }
3027 
3028  AM = ISD::PRE_INC;
3029  return true;
3030 }
3031 
3032 //===----------------------------------------------------------------------===//
3033 // LowerOperation implementation
3034 //===----------------------------------------------------------------------===//
3035 
3036 /// Return true if we should reference labels using a PICBase, set the HiOpFlags
3037 /// and LoOpFlags to the target MO flags.
3038 static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget,
3039  unsigned &HiOpFlags, unsigned &LoOpFlags,
3040  const GlobalValue *GV = nullptr) {
3041  HiOpFlags = PPCII::MO_HA;
3042  LoOpFlags = PPCII::MO_LO;
3043 
3044  // Don't use the pic base if not in PIC relocation model.
3045  if (IsPIC) {
3046  HiOpFlags |= PPCII::MO_PIC_FLAG;
3047  LoOpFlags |= PPCII::MO_PIC_FLAG;
3048  }
3049 }
3050 
3051 static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC,
3052  SelectionDAG &DAG) {
3053  SDLoc DL(HiPart);
3054  EVT PtrVT = HiPart.getValueType();
3055  SDValue Zero = DAG.getConstant(0, DL, PtrVT);
3056 
3057  SDValue Hi = DAG.getNode(PPCISD::Hi, DL, PtrVT, HiPart, Zero);
3058  SDValue Lo = DAG.getNode(PPCISD::Lo, DL, PtrVT, LoPart, Zero);
3059 
3060  // With PIC, the first instruction is actually "GR+hi(&G)".
3061  if (isPIC)
3062  Hi = DAG.getNode(ISD::ADD, DL, PtrVT,
3063  DAG.getNode(PPCISD::GlobalBaseReg, DL, PtrVT), Hi);
3064 
3065  // Generate non-pic code that has direct accesses to the constant pool.
3066  // The address of the global is just (hi(&g)+lo(&g)).
3067  return DAG.getNode(ISD::ADD, DL, PtrVT, Hi, Lo);
3068 }
3069 
3071  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3072  FuncInfo->setUsesTOCBasePtr();
3073 }
3074 
3075 static void setUsesTOCBasePtr(SelectionDAG &DAG) {
3077 }
3078 
3079 SDValue PPCTargetLowering::getTOCEntry(SelectionDAG &DAG, const SDLoc &dl,
3080  SDValue GA) const {
3081  const bool Is64Bit = Subtarget.isPPC64();
3082  EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
3083  SDValue Reg = Is64Bit ? DAG.getRegister(PPC::X2, VT)
3084  : Subtarget.isAIXABI()
3085  ? DAG.getRegister(PPC::R2, VT)
3086  : DAG.getNode(PPCISD::GlobalBaseReg, dl, VT);
3087  SDValue Ops[] = { GA, Reg };
3088  return DAG.getMemIntrinsicNode(
3089  PPCISD::TOC_ENTRY, dl, DAG.getVTList(VT, MVT::Other), Ops, VT,
3092 }
3093 
3094 SDValue PPCTargetLowering::LowerConstantPool(SDValue Op,
3095  SelectionDAG &DAG) const {
3096  EVT PtrVT = Op.getValueType();
3097  ConstantPoolSDNode *CP = cast<ConstantPoolSDNode>(Op);
3098  const Constant *C = CP->getConstVal();
3099 
3100  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3101  // The actual address of the GlobalValue is stored in the TOC.
3102  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3103  if (Subtarget.isUsingPCRelativeCalls()) {
3104  SDLoc DL(CP);
3105  EVT Ty = getPointerTy(DAG.getDataLayout());
3106  SDValue ConstPool = DAG.getTargetConstantPool(
3107  C, Ty, CP->getAlign(), CP->getOffset(), PPCII::MO_PCREL_FLAG);
3108  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, ConstPool);
3109  }
3110  setUsesTOCBasePtr(DAG);
3111  SDValue GA = DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0);
3112  return getTOCEntry(DAG, SDLoc(CP), GA);
3113  }
3114 
3115  unsigned MOHiFlag, MOLoFlag;
3116  bool IsPIC = isPositionIndependent();
3117  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3118 
3119  if (IsPIC && Subtarget.isSVR4ABI()) {
3120  SDValue GA =
3121  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), PPCII::MO_PIC_FLAG);
3122  return getTOCEntry(DAG, SDLoc(CP), GA);
3123  }
3124 
3125  SDValue CPIHi =
3126  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOHiFlag);
3127  SDValue CPILo =
3128  DAG.getTargetConstantPool(C, PtrVT, CP->getAlign(), 0, MOLoFlag);
3129  return LowerLabelRef(CPIHi, CPILo, IsPIC, DAG);
3130 }
3131 
3132 // For 64-bit PowerPC, prefer the more compact relative encodings.
3133 // This trades 32 bits per jump table entry for one or two instructions
3134 // on the jump site.
3136  if (isJumpTableRelative())
3138 
3140 }
3141 
3144  return false;
3145  if (Subtarget.isPPC64() || Subtarget.isAIXABI())
3146  return true;
3148 }
3149 
3151  SelectionDAG &DAG) const {
3152  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3153  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3154 
3155  switch (getTargetMachine().getCodeModel()) {
3156  case CodeModel::Small:
3157  case CodeModel::Medium:
3158  return TargetLowering::getPICJumpTableRelocBase(Table, DAG);
3159  default:
3160  return DAG.getNode(PPCISD::GlobalBaseReg, SDLoc(),
3161  getPointerTy(DAG.getDataLayout()));
3162  }
3163 }
3164 
3165 const MCExpr *
3167  unsigned JTI,
3168  MCContext &Ctx) const {
3169  if (!Subtarget.isPPC64() || Subtarget.isAIXABI())
3170  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3171 
3172  switch (getTargetMachine().getCodeModel()) {
3173  case CodeModel::Small:
3174  case CodeModel::Medium:
3175  return TargetLowering::getPICJumpTableRelocBaseExpr(MF, JTI, Ctx);
3176  default:
3177  return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
3178  }
3179 }
3180 
3181 SDValue PPCTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
3182  EVT PtrVT = Op.getValueType();
3183  JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
3184 
3185  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3186  if (Subtarget.isUsingPCRelativeCalls()) {
3187  SDLoc DL(JT);
3188  EVT Ty = getPointerTy(DAG.getDataLayout());
3189  SDValue GA =
3190  DAG.getTargetJumpTable(JT->getIndex(), Ty, PPCII::MO_PCREL_FLAG);
3191  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3192  return MatAddr;
3193  }
3194 
3195  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3196  // The actual address of the GlobalValue is stored in the TOC.
3197  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3198  setUsesTOCBasePtr(DAG);
3199  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
3200  return getTOCEntry(DAG, SDLoc(JT), GA);
3201  }
3202 
3203  unsigned MOHiFlag, MOLoFlag;
3204  bool IsPIC = isPositionIndependent();
3205  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3206 
3207  if (IsPIC && Subtarget.isSVR4ABI()) {
3208  SDValue GA = DAG.getTargetJumpTable(JT->getIndex(), PtrVT,
3210  return getTOCEntry(DAG, SDLoc(GA), GA);
3211  }
3212 
3213  SDValue JTIHi = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOHiFlag);
3214  SDValue JTILo = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, MOLoFlag);
3215  return LowerLabelRef(JTIHi, JTILo, IsPIC, DAG);
3216 }
3217 
3218 SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
3219  SelectionDAG &DAG) const {
3220  EVT PtrVT = Op.getValueType();
3221  BlockAddressSDNode *BASDN = cast<BlockAddressSDNode>(Op);
3222  const BlockAddress *BA = BASDN->getBlockAddress();
3223 
3224  // isUsingPCRelativeCalls() returns true when PCRelative is enabled
3225  if (Subtarget.isUsingPCRelativeCalls()) {
3226  SDLoc DL(BASDN);
3227  EVT Ty = getPointerTy(DAG.getDataLayout());
3228  SDValue GA = DAG.getTargetBlockAddress(BA, Ty, BASDN->getOffset(),
3230  SDValue MatAddr = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3231  return MatAddr;
3232  }
3233 
3234  // 64-bit SVR4 ABI and AIX ABI code are always position-independent.
3235  // The actual BlockAddress is stored in the TOC.
3236  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3237  setUsesTOCBasePtr(DAG);
3238  SDValue GA = DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset());
3239  return getTOCEntry(DAG, SDLoc(BASDN), GA);
3240  }
3241 
3242  // 32-bit position-independent ELF stores the BlockAddress in the .got.
3243  if (Subtarget.is32BitELFABI() && isPositionIndependent())
3244  return getTOCEntry(
3245  DAG, SDLoc(BASDN),
3246  DAG.getTargetBlockAddress(BA, PtrVT, BASDN->getOffset()));
3247 
3248  unsigned MOHiFlag, MOLoFlag;
3249  bool IsPIC = isPositionIndependent();
3250  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag);
3251  SDValue TgtBAHi = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOHiFlag);
3252  SDValue TgtBALo = DAG.getTargetBlockAddress(BA, PtrVT, 0, MOLoFlag);
3253  return LowerLabelRef(TgtBAHi, TgtBALo, IsPIC, DAG);
3254 }
3255 
3256 SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
3257  SelectionDAG &DAG) const {
3258  if (Subtarget.isAIXABI())
3259  return LowerGlobalTLSAddressAIX(Op, DAG);
3260 
3261  return LowerGlobalTLSAddressLinux(Op, DAG);
3262 }
3263 
3264 SDValue PPCTargetLowering::LowerGlobalTLSAddressAIX(SDValue Op,
3265  SelectionDAG &DAG) const {
3266  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3267 
3268  if (DAG.getTarget().useEmulatedTLS())
3269  report_fatal_error("Emulated TLS is not yet supported on AIX");
3270 
3271  SDLoc dl(GA);
3272  const GlobalValue *GV = GA->getGlobal();
3273  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3274 
3275  // The general-dynamic model is the only access model supported for now, so
3276  // all the GlobalTLSAddress nodes are lowered with this model.
3277  // We need to generate two TOC entries, one for the variable offset, one for
3278  // the region handle. The global address for the TOC entry of the region
3279  // handle is created with the MO_TLSGDM_FLAG flag and the global address
3280  // for the TOC entry of the variable offset is created with MO_TLSGD_FLAG.
3281  SDValue VariableOffsetTGA =
3282  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGD_FLAG);
3283  SDValue RegionHandleTGA =
3284  DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, PPCII::MO_TLSGDM_FLAG);
3285  SDValue VariableOffset = getTOCEntry(DAG, dl, VariableOffsetTGA);
3286  SDValue RegionHandle = getTOCEntry(DAG, dl, RegionHandleTGA);
3287  return DAG.getNode(PPCISD::TLSGD_AIX, dl, PtrVT, VariableOffset,
3288  RegionHandle);
3289 }
3290 
3291 SDValue PPCTargetLowering::LowerGlobalTLSAddressLinux(SDValue Op,
3292  SelectionDAG &DAG) const {
3293  // FIXME: TLS addresses currently use medium model code sequences,
3294  // which is the most useful form. Eventually support for small and
3295  // large models could be added if users need it, at the cost of
3296  // additional complexity.
3297  GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
3298  if (DAG.getTarget().useEmulatedTLS())
3299  return LowerToTLSEmulatedModel(GA, DAG);
3300 
3301  SDLoc dl(GA);
3302  const GlobalValue *GV = GA->getGlobal();
3303  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3304  bool is64bit = Subtarget.isPPC64();
3305  const Module *M = DAG.getMachineFunction().getFunction().getParent();
3306  PICLevel::Level picLevel = M->getPICLevel();
3307 
3308  const TargetMachine &TM = getTargetMachine();
3309  TLSModel::Model Model = TM.getTLSModel(GV);
3310 
3311  if (Model == TLSModel::LocalExec) {
3312  if (Subtarget.isUsingPCRelativeCalls()) {
3313  SDValue TLSReg = DAG.getRegister(PPC::X13, MVT::i64);
3314  SDValue TGA = DAG.getTargetGlobalAddress(
3315  GV, dl, PtrVT, 0, (PPCII::MO_PCREL_FLAG | PPCII::MO_TPREL_FLAG));
3316  SDValue MatAddr =
3317  DAG.getNode(PPCISD::TLS_LOCAL_EXEC_MAT_ADDR, dl, PtrVT, TGA);
3318  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TLSReg, MatAddr);
3319  }
3320 
3321  SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3323  SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3325  SDValue TLSReg = is64bit ? DAG.getRegister(PPC::X13, MVT::i64)
3326  : DAG.getRegister(PPC::R2, MVT::i32);
3327 
3328  SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
3329  return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
3330  }
3331 
3332  if (Model == TLSModel::InitialExec) {
3333  bool IsPCRel = Subtarget.isUsingPCRelativeCalls();
3334  SDValue TGA = DAG.getTargetGlobalAddress(
3335  GV, dl, PtrVT, 0, IsPCRel ? PPCII::MO_GOT_TPREL_PCREL_FLAG : 0);
3336  SDValue TGATLS = DAG.getTargetGlobalAddress(
3337  GV, dl, PtrVT, 0,
3339  SDValue TPOffset;
3340  if (IsPCRel) {
3341  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, dl, PtrVT, TGA);
3342  TPOffset = DAG.getLoad(MVT::i64, dl, DAG.getEntryNode(), MatPCRel,
3343  MachinePointerInfo());
3344  } else {
3345  SDValue GOTPtr;
3346  if (is64bit) {
3347  setUsesTOCBasePtr(DAG);
3348  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3349  GOTPtr =
3350  DAG.getNode(PPCISD::ADDIS_GOT_TPREL_HA, dl, PtrVT, GOTReg, TGA);
3351  } else {
3352  if (!TM.isPositionIndependent())
3353  GOTPtr = DAG.getNode(PPCISD::PPC32_GOT, dl, PtrVT);
3354  else if (picLevel == PICLevel::SmallPIC)
3355  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3356  else
3357  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3358  }
3359  TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL_L, dl, PtrVT, TGA, GOTPtr);
3360  }
3361  return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TGATLS);
3362  }
3363 
3365  if (Subtarget.isUsingPCRelativeCalls()) {
3366  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3368  return DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3369  }
3370 
3371  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3372  SDValue GOTPtr;
3373  if (is64bit) {
3374  setUsesTOCBasePtr(DAG);
3375  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3376  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
3377  GOTReg, TGA);
3378  } else {
3379  if (picLevel == PICLevel::SmallPIC)
3380  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3381  else
3382  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3383  }
3384  return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
3385  GOTPtr, TGA, TGA);
3386  }
3387 
3388  if (Model == TLSModel::LocalDynamic) {
3389  if (Subtarget.isUsingPCRelativeCalls()) {
3390  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
3392  SDValue MatPCRel =
3393  DAG.getNode(PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR, dl, PtrVT, TGA);
3394  return DAG.getNode(PPCISD::PADDI_DTPREL, dl, PtrVT, MatPCRel, TGA);
3395  }
3396 
3397  SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
3398  SDValue GOTPtr;
3399  if (is64bit) {
3400  setUsesTOCBasePtr(DAG);
3401  SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
3402  GOTPtr = DAG.getNode(PPCISD::ADDIS_TLSLD_HA, dl, PtrVT,
3403  GOTReg, TGA);
3404  } else {
3405  if (picLevel == PICLevel::SmallPIC)
3406  GOTPtr = DAG.getNode(PPCISD::GlobalBaseReg, dl, PtrVT);
3407  else
3408  GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
3409  }
3410  SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
3411  PtrVT, GOTPtr, TGA, TGA);
3412  SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
3413  PtrVT, TLSAddr, TGA);
3414  return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
3415  }
3416 
3417  llvm_unreachable("Unknown TLS model!");
3418 }
3419 
3420 SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,
3421  SelectionDAG &DAG) const {
3422  EVT PtrVT = Op.getValueType();
3423  GlobalAddressSDNode *GSDN = cast<GlobalAddressSDNode>(Op);
3424  SDLoc DL(GSDN);
3425  const GlobalValue *GV = GSDN->getGlobal();
3426 
3427  // 64-bit SVR4 ABI & AIX ABI code is always position-independent.
3428  // The actual address of the GlobalValue is stored in the TOC.
3429  if (Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) {
3430  if (Subtarget.isUsingPCRelativeCalls()) {
3431  EVT Ty = getPointerTy(DAG.getDataLayout());
3432  if (isAccessedAsGotIndirect(Op)) {
3433  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3436  SDValue MatPCRel = DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3437  SDValue Load = DAG.getLoad(MVT::i64, DL, DAG.getEntryNode(), MatPCRel,
3438  MachinePointerInfo());
3439  return Load;
3440  } else {
3441  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, Ty, GSDN->getOffset(),
3443  return DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, Ty, GA);
3444  }
3445  }
3446  setUsesTOCBasePtr(DAG);
3447  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset());
3448  return getTOCEntry(DAG, DL, GA);
3449  }
3450 
3451  unsigned MOHiFlag, MOLoFlag;
3452  bool IsPIC = isPositionIndependent();
3453  getLabelAccessInfo(IsPIC, Subtarget, MOHiFlag, MOLoFlag, GV);
3454 
3455  if (IsPIC && Subtarget.isSVR4ABI()) {
3456  SDValue GA = DAG.getTargetGlobalAddress(GV, DL, PtrVT,
3457  GSDN->getOffset(),
3459  return getTOCEntry(DAG, DL, GA);
3460  }
3461 
3462  SDValue GAHi =
3463  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOHiFlag);
3464  SDValue GALo =
3465  DAG.getTargetGlobalAddress(GV, DL, PtrVT, GSDN->getOffset(), MOLoFlag);
3466 
3467  return LowerLabelRef(GAHi, GALo, IsPIC, DAG);
3468 }
3469 
3470 SDValue PPCTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
3471  bool IsStrict = Op->isStrictFPOpcode();
3472  ISD::CondCode CC =
3473  cast<CondCodeSDNode>(Op.getOperand(IsStrict ? 3 : 2))->get();
3474  SDValue LHS = Op.getOperand(IsStrict ? 1 : 0);
3475  SDValue RHS = Op.getOperand(IsStrict ? 2 : 1);
3476  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
3477  EVT LHSVT = LHS.getValueType();
3478  SDLoc dl(Op);
3479 
3480  // Soften the setcc with libcall if it is fp128.
3481  if (LHSVT == MVT::f128) {
3482  assert(!Subtarget.hasP9Vector() &&
3483  "SETCC for f128 is already legal under Power9!");
3484  softenSetCCOperands(DAG, LHSVT, LHS, RHS, CC, dl, LHS, RHS, Chain,
3485  Op->getOpcode() == ISD::STRICT_FSETCCS);
3486  if (RHS.getNode())
3487  LHS = DAG.getNode(ISD::SETCC, dl, Op.getValueType(), LHS, RHS,
3488  DAG.getCondCode(CC));
3489  if (IsStrict)
3490  return DAG.getMergeValues({LHS, Chain}, dl);
3491  return LHS;
3492  }
3493 
3494  assert(!IsStrict && "Don't know how to handle STRICT_FSETCC!");
3495 
3496  if (Op.getValueType() == MVT::v2i64) {
3497  // When the operands themselves are v2i64 values, we need to do something
3498  // special because VSX has no underlying comparison operations for these.
3499  if (LHS.getValueType() == MVT::v2i64) {
3500  // Equality can be handled by casting to the legal type for Altivec
3501  // comparisons, everything else needs to be expanded.
3502  if (CC != ISD::SETEQ && CC != ISD::SETNE)
3503  return SDValue();
3504  SDValue SetCC32 = DAG.getSetCC(
3505  dl, MVT::v4i32, DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, LHS),
3506  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, RHS), CC);
3507  int ShuffV[] = {1, 0, 3, 2};
3508  SDValue Shuff =
3509  DAG.getVectorShuffle(MVT::v4i32, dl, SetCC32, SetCC32, ShuffV);
3510  return DAG.getBitcast(MVT::v2i64,
3511  DAG.getNode(CC == ISD::SETEQ ? ISD::AND : ISD::OR,
3512  dl, MVT::v4i32, Shuff, SetCC32));
3513  }
3514 
3515  // We handle most of these in the usual way.
3516  return Op;
3517  }
3518 
3519  // If we're comparing for equality to zero, expose the fact that this is
3520  // implemented as a ctlz/srl pair on ppc, so that the dag combiner can
3521  // fold the new nodes.
3522  if (SDValue V = lowerCmpEqZeroToCtlzSrl(Op, DAG))
3523  return V;
3524 
3525  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS)) {
3526  // Leave comparisons against 0 and -1 alone for now, since they're usually
3527  // optimized. FIXME: revisit this when we can custom lower all setcc
3528  // optimizations.
3529  if (C->isAllOnes() || C->isZero())
3530  return SDValue();
3531  }
3532 
3533  // If we have an integer seteq/setne, turn it into a compare against zero
3534  // by xor'ing the rhs with the lhs, which is faster than setting a
3535  // condition register, reading it back out, and masking the correct bit. The
3536  // normal approach here uses sub to do this instead of xor. Using xor exposes
3537  // the result to other bit-twiddling opportunities.
3538  if (LHSVT.isInteger() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
3539  EVT VT = Op.getValueType();
3540  SDValue Sub = DAG.getNode(ISD::XOR, dl, LHSVT, LHS, RHS);
3541  return DAG.getSetCC(dl, VT, Sub, DAG.getConstant(0, dl, LHSVT), CC);
3542  }
3543  return SDValue();
3544 }
3545 
3546 SDValue PPCTargetLowering::LowerVAARG(SDValue Op, SelectionDAG &DAG) const {
3547  SDNode *Node = Op.getNode();
3548  EVT VT = Node->getValueType(0);
3549  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3550  SDValue InChain = Node->getOperand(0);
3551  SDValue VAListPtr = Node->getOperand(1);
3552  const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
3553  SDLoc dl(Node);
3554 
3555  assert(!Subtarget.isPPC64() && "LowerVAARG is PPC32 only");
3556 
3557  // gpr_index
3558  SDValue GprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3559  VAListPtr, MachinePointerInfo(SV), MVT::i8);
3560  InChain = GprIndex.getValue(1);
3561 
3562  if (VT == MVT::i64) {
3563  // Check if GprIndex is even
3564  SDValue GprAnd = DAG.getNode(ISD::AND, dl, MVT::i32, GprIndex,
3565  DAG.getConstant(1, dl, MVT::i32));
3566  SDValue CC64 = DAG.getSetCC(dl, MVT::i32, GprAnd,
3567  DAG.getConstant(0, dl, MVT::i32), ISD::SETNE);
3568  SDValue GprIndexPlusOne = DAG.getNode(ISD::ADD, dl, MVT::i32, GprIndex,
3569  DAG.getConstant(1, dl, MVT::i32));
3570  // Align GprIndex to be even if it isn't
3571  GprIndex = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC64, GprIndexPlusOne,
3572  GprIndex);
3573  }
3574 
3575  // fpr index is 1 byte after gpr
3576  SDValue FprPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3577  DAG.getConstant(1, dl, MVT::i32));
3578 
3579  // fpr
3580  SDValue FprIndex = DAG.getExtLoad(ISD::ZEXTLOAD, dl, MVT::i32, InChain,
3581  FprPtr, MachinePointerInfo(SV), MVT::i8);
3582  InChain = FprIndex.getValue(1);
3583 
3584  SDValue RegSaveAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3585  DAG.getConstant(8, dl, MVT::i32));
3586 
3587  SDValue OverflowAreaPtr = DAG.getNode(ISD::ADD, dl, PtrVT, VAListPtr,
3588  DAG.getConstant(4, dl, MVT::i32));
3589 
3590  // areas
3591  SDValue OverflowArea =
3592  DAG.getLoad(MVT::i32, dl, InChain, OverflowAreaPtr, MachinePointerInfo());
3593  InChain = OverflowArea.getValue(1);
3594 
3595  SDValue RegSaveArea =
3596  DAG.getLoad(MVT::i32, dl, InChain, RegSaveAreaPtr, MachinePointerInfo());
3597  InChain = RegSaveArea.getValue(1);
3598 
3599  // select overflow_area if index > 8
3600  SDValue CC = DAG.getSetCC(dl, MVT::i32, VT.isInteger() ? GprIndex : FprIndex,
3601  DAG.getConstant(8, dl, MVT::i32), ISD::SETLT);
3602 
3603  // adjustment constant gpr_index * 4/8
3604  SDValue RegConstant = DAG.getNode(ISD::MUL, dl, MVT::i32,
3605  VT.isInteger() ? GprIndex : FprIndex,
3606  DAG.getConstant(VT.isInteger() ? 4 : 8, dl,
3607  MVT::i32));
3608 
3609  // OurReg = RegSaveArea + RegConstant
3610  SDValue OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, RegSaveArea,
3611  RegConstant);
3612 
3613  // Floating types are 32 bytes into RegSaveArea
3614  if (VT.isFloatingPoint())
3615  OurReg = DAG.getNode(ISD::ADD, dl, PtrVT, OurReg,
3616  DAG.getConstant(32, dl, MVT::i32));
3617 
3618  // increase {f,g}pr_index by 1 (or 2 if VT is i64)
3619  SDValue IndexPlus1 = DAG.getNode(ISD::ADD, dl, MVT::i32,
3620  VT.isInteger() ? GprIndex : FprIndex,
3621  DAG.getConstant(VT == MVT::i64 ? 2 : 1, dl,
3622  MVT::i32));
3623 
3624  InChain = DAG.getTruncStore(InChain, dl, IndexPlus1,
3625  VT.isInteger() ? VAListPtr : FprPtr,
3627 
3628  // determine if we should load from reg_save_area or overflow_area
3629  SDValue Result = DAG.getNode(ISD::SELECT, dl, PtrVT, CC, OurReg, OverflowArea);
3630 
3631  // increase overflow_area by 4/8 if gpr/fpr > 8
3632  SDValue OverflowAreaPlusN = DAG.getNode(ISD::ADD, dl, PtrVT, OverflowArea,
3633  DAG.getConstant(VT.isInteger() ? 4 : 8,
3634  dl, MVT::i32));
3635 
3636  OverflowArea = DAG.getNode(ISD::SELECT, dl, MVT::i32, CC, OverflowArea,
3637  OverflowAreaPlusN);
3638 
3639  InChain = DAG.getTruncStore(InChain, dl, OverflowArea, OverflowAreaPtr,
3641 
3642  return DAG.getLoad(VT, dl, InChain, Result, MachinePointerInfo());
3643 }
3644 
3645 SDValue PPCTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
3646  assert(!Subtarget.isPPC64() && "LowerVACOPY is PPC32 only");
3647 
3648  // We have to copy the entire va_list struct:
3649  // 2*sizeof(char) + 2 Byte alignment + 2*sizeof(char*) = 12 Byte
3650  return DAG.getMemcpy(Op.getOperand(0), Op, Op.getOperand(1), Op.getOperand(2),
3651  DAG.getConstant(12, SDLoc(Op), MVT::i32), Align(8),
3652  false, true, false, MachinePointerInfo(),
3653  MachinePointerInfo());
3654 }
3655 
3656 SDValue PPCTargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
3657  SelectionDAG &DAG) const {
3658  if (Subtarget.isAIXABI())
3659  report_fatal_error("ADJUST_TRAMPOLINE operation is not supported on AIX.");
3660 
3661  return Op.getOperand(0);
3662 }
3663 
3664 SDValue PPCTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
3665  MachineFunction &MF = DAG.getMachineFunction();
3666  PPCFunctionInfo &MFI = *MF.getInfo<PPCFunctionInfo>();
3667 
3668  assert((Op.getOpcode() == ISD::INLINEASM ||
3669  Op.getOpcode() == ISD::INLINEASM_BR) &&
3670  "Expecting Inline ASM node.");
3671 
3672  // If an LR store is already known to be required then there is not point in
3673  // checking this ASM as well.
3674  if (MFI.isLRStoreRequired())
3675  return Op;
3676 
3677  // Inline ASM nodes have an optional last operand that is an incoming Flag of
3678  // type MVT::Glue. We want to ignore this last operand if that is the case.
3679  unsigned NumOps = Op.getNumOperands();
3680  if (Op.getOperand(NumOps - 1).getValueType() == MVT::Glue)
3681  --NumOps;
3682 
3683  // Check all operands that may contain the LR.
3684  for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
3685  unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
3686  unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
3687  ++i; // Skip the ID value.
3688 
3689  switch (InlineAsm::getKind(Flags)) {
3690  default:
3691  llvm_unreachable("Bad flags!");
3693  case InlineAsm::Kind_Imm:
3694  case InlineAsm::Kind_Mem:
3695  i += NumVals;
3696  break;
3700  for (; NumVals; --NumVals, ++i) {
3701  Register Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
3702  if (Reg != PPC::LR && Reg != PPC::LR8)
3703  continue;
3704  MFI.setLRStoreRequired();
3705  return Op;
3706  }
3707  break;
3708  }
3709  }
3710  }
3711 
3712  return Op;
3713 }
3714 
3715 SDValue PPCTargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
3716  SelectionDAG &DAG) const {
3717  if (Subtarget.isAIXABI())
3718  report_fatal_error("INIT_TRAMPOLINE operation is not supported on AIX.");
3719 
3720  SDValue Chain = Op.getOperand(0);
3721  SDValue Trmp = Op.getOperand(1); // trampoline
3722  SDValue FPtr = Op.getOperand(2); // nested function
3723  SDValue Nest = Op.getOperand(3); // 'nest' parameter value
3724  SDLoc dl(Op);
3725 
3726  EVT PtrVT = getPointerTy(DAG.getDataLayout());
3727  bool isPPC64 = (PtrVT == MVT::i64);
3728  Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
3729 
3731  TargetLowering::ArgListEntry Entry;
3732 
3733  Entry.Ty = IntPtrTy;
3734  Entry.Node = Trmp; Args.push_back(Entry);
3735 
3736  // TrampSize == (isPPC64 ? 48 : 40);
3737  Entry.Node = DAG.getConstant(isPPC64 ? 48 : 40, dl,
3738  isPPC64 ? MVT::i64 : MVT::i32);
3739  Args.push_back(Entry);
3740 
3741  Entry.Node = FPtr; Args.push_back(Entry);
3742  Entry.Node = Nest; Args.push_back(Entry);
3743 
3744  // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
3746  CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
3748  DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
3749 
3750  std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
3751  return CallResult.second;
3752 }
3753 
3754 SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
3755  MachineFunction &MF = DAG.getMachineFunction();
3756  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
3757  EVT PtrVT = getPointerTy(MF.getDataLayout());
3758 
3759  SDLoc dl(Op);
3760 
3761  if (Subtarget.isPPC64() || Subtarget.isAIXABI()) {
3762  // vastart just stores the address of the VarArgsFrameIndex slot into the
3763  // memory location argument.
3764  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
3765  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3766  return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1),
3767  MachinePointerInfo(SV));
3768  }
3769 
3770  // For the 32-bit SVR4 ABI we follow the layout of the va_list struct.
3771  // We suppose the given va_list is already allocated.
3772  //
3773  // typedef struct {
3774  // char gpr; /* index into the array of 8 GPRs
3775  // * stored in the register save area
3776  // * gpr=0 corresponds to r3,
3777  // * gpr=1 to r4, etc.
3778  // */
3779  // char fpr; /* index into the array of 8 FPRs
3780  // * stored in the register save area
3781  // * fpr=0 corresponds to f1,
3782  // * fpr=1 to f2, etc.
3783  // */
3784  // char *overflow_arg_area;
3785  // /* location on stack that holds
3786  // * the next overflow argument
3787  // */
3788  // char *reg_save_area;
3789  // /* where r3:r10 and f1:f8 (if saved)
3790  // * are stored
3791  // */
3792  // } va_list[1];
3793 
3794  SDValue ArgGPR = DAG.getConstant(FuncInfo->getVarArgsNumGPR(), dl, MVT::i32);
3795  SDValue ArgFPR = DAG.getConstant(FuncInfo->getVarArgsNumFPR(), dl, MVT::i32);
3796  SDValue StackOffsetFI = DAG.getFrameIndex(FuncInfo->getVarArgsStackOffset(),
3797  PtrVT);
3798  SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
3799  PtrVT);
3800 
3801  uint64_t FrameOffset = PtrVT.getSizeInBits()/8;
3802  SDValue ConstFrameOffset = DAG.getConstant(FrameOffset, dl, PtrVT);
3803 
3804  uint64_t StackOffset = PtrVT.getSizeInBits()/8 - 1;
3805  SDValue ConstStackOffset = DAG.getConstant(StackOffset, dl, PtrVT);
3806 
3807  uint64_t FPROffset = 1;
3808  SDValue ConstFPROffset = DAG.getConstant(FPROffset, dl, PtrVT);
3809 
3810  const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
3811 
3812  // Store first byte : number of int regs
3813  SDValue firstStore =
3814  DAG.getTruncStore(Op.getOperand(0), dl, ArgGPR, Op.getOperand(1),
3816  uint64_t nextOffset = FPROffset;
3817  SDValue nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, Op.getOperand(1),
3818  ConstFPROffset);
3819 
3820  // Store second byte : number of float regs
3821  SDValue secondStore =
3822  DAG.getTruncStore(firstStore, dl, ArgFPR, nextPtr,
3823  MachinePointerInfo(SV, nextOffset), MVT::i8);
3824  nextOffset += StackOffset;
3825  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstStackOffset);
3826 
3827  // Store second word : arguments given on stack
3828  SDValue thirdStore = DAG.getStore(secondStore, dl, StackOffsetFI, nextPtr,
3829  MachinePointerInfo(SV, nextOffset));
3830  nextOffset += FrameOffset;
3831  nextPtr = DAG.getNode(ISD::ADD, dl, PtrVT, nextPtr, ConstFrameOffset);
3832 
3833  // Store third word : arguments given in registers
3834  return DAG.getStore(thirdStore, dl, FR, nextPtr,
3835  MachinePointerInfo(SV, nextOffset));
3836 }
3837 
3838 /// FPR - The set of FP registers that should be allocated for arguments
3839 /// on Darwin and AIX.
3840 static const MCPhysReg FPR[] = {PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5,
3841  PPC::F6, PPC::F7, PPC::F8, PPC::F9, PPC::F10,
3842  PPC::F11, PPC::F12, PPC::F13};
3843 
3844 /// CalculateStackSlotSize - Calculates the size reserved for this argument on
3845 /// the stack.
3846 static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags,
3847  unsigned PtrByteSize) {
3848  unsigned ArgSize = ArgVT.getStoreSize();
3849  if (Flags.isByVal())
3850  ArgSize = Flags.getByValSize();
3851 
3852  // Round up to multiples of the pointer size, except for array members,
3853  // which are always packed.
3854  if (!Flags.isInConsecutiveRegs())
3855  ArgSize = ((ArgSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3856 
3857  return ArgSize;
3858 }
3859 
3860 /// CalculateStackSlotAlignment - Calculates the alignment of this argument
3861 /// on the stack.
3863  ISD::ArgFlagsTy Flags,
3864  unsigned PtrByteSize) {
3865  Align Alignment(PtrByteSize);
3866 
3867  // Altivec parameters are padded to a 16 byte boundary.
3868  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3869  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3870  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3871  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3872  Alignment = Align(16);
3873 
3874  // ByVal parameters are aligned as requested.
3875  if (Flags.isByVal()) {
3876  auto BVAlign = Flags.getNonZeroByValAlign();
3877  if (BVAlign > PtrByteSize) {
3878  if (BVAlign.value() % PtrByteSize != 0)
3880  "ByVal alignment is not a multiple of the pointer size");
3881 
3882  Alignment = BVAlign;
3883  }
3884  }
3885 
3886  // Array members are always packed to their original alignment.
3887  if (Flags.isInConsecutiveRegs()) {
3888  // If the array member was split into multiple registers, the first
3889  // needs to be aligned to the size of the full type. (Except for
3890  // ppcf128, which is only aligned as its f64 components.)
3891  if (Flags.isSplit() && OrigVT != MVT::ppcf128)
3892  Alignment = Align(OrigVT.getStoreSize());
3893  else
3894  Alignment = Align(ArgVT.getStoreSize());
3895  }
3896 
3897  return Alignment;
3898 }
3899 
3900 /// CalculateStackSlotUsed - Return whether this argument will use its
3901 /// stack slot (instead of being passed in registers). ArgOffset,
3902 /// AvailableFPRs, and AvailableVRs must hold the current argument
3903 /// position, and will be updated to account for this argument.
3904 static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags,
3905  unsigned PtrByteSize, unsigned LinkageSize,
3906  unsigned ParamAreaSize, unsigned &ArgOffset,
3907  unsigned &AvailableFPRs,
3908  unsigned &AvailableVRs) {
3909  bool UseMemory = false;
3910 
3911  // Respect alignment of argument on the stack.
3912  Align Alignment =
3913  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
3914  ArgOffset = alignTo(ArgOffset, Alignment);
3915  // If there's no space left in the argument save area, we must
3916  // use memory (this check also catches zero-sized arguments).
3917  if (ArgOffset >= LinkageSize + ParamAreaSize)
3918  UseMemory = true;
3919 
3920  // Allocate argument on the stack.
3921  ArgOffset += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
3922  if (Flags.isInConsecutiveRegsLast())
3923  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
3924  // If we overran the argument save area, we must use memory
3925  // (this check catches arguments passed partially in memory)
3926  if (ArgOffset > LinkageSize + ParamAreaSize)
3927  UseMemory = true;
3928 
3929  // However, if the argument is actually passed in an FPR or a VR,
3930  // we don't use memory after all.
3931  if (!Flags.isByVal()) {
3932  if (ArgVT == MVT::f32 || ArgVT == MVT::f64)
3933  if (AvailableFPRs > 0) {
3934  --AvailableFPRs;
3935  return false;
3936  }
3937  if (ArgVT == MVT::v4f32 || ArgVT == MVT::v4i32 ||
3938  ArgVT == MVT::v8i16 || ArgVT == MVT::v16i8 ||
3939  ArgVT == MVT::v2f64 || ArgVT == MVT::v2i64 ||
3940  ArgVT == MVT::v1i128 || ArgVT == MVT::f128)
3941  if (AvailableVRs > 0) {
3942  --AvailableVRs;
3943  return false;
3944  }
3945  }
3946 
3947  return UseMemory;
3948 }
3949 
3950 /// EnsureStackAlignment - Round stack frame size up from NumBytes to
3951 /// ensure minimum alignment required for target.
3953  unsigned NumBytes) {
3954  return alignTo(NumBytes, Lowering->getStackAlign());
3955 }
3956 
3957 SDValue PPCTargetLowering::LowerFormalArguments(
3958  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3959  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3960  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3961  if (Subtarget.isAIXABI())
3962  return LowerFormalArguments_AIX(Chain, CallConv, isVarArg, Ins, dl, DAG,
3963  InVals);
3964  if (Subtarget.is64BitELFABI())
3965  return LowerFormalArguments_64SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3966  InVals);
3967  assert(Subtarget.is32BitELFABI());
3968  return LowerFormalArguments_32SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG,
3969  InVals);
3970 }
3971 
3972 SDValue PPCTargetLowering::LowerFormalArguments_32SVR4(
3973  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
3974  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
3975  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
3976 
3977  // 32-bit SVR4 ABI Stack Frame Layout:
3978  // +-----------------------------------+
3979  // +--> | Back chain |
3980  // | +-----------------------------------+
3981  // | | Floating-point register save area |
3982  // | +-----------------------------------+
3983  // | | General register save area |
3984  // | +-----------------------------------+
3985  // | | CR save word |
3986  // | +-----------------------------------+
3987  // | | VRSAVE save word |
3988  // | +-----------------------------------+
3989  // | | Alignment padding |
3990  // | +-----------------------------------+
3991  // | | Vector register save area |
3992  // | +-----------------------------------+
3993  // | | Local variable space |
3994  // | +-----------------------------------+
3995  // | | Parameter list area |
3996  // | +-----------------------------------+
3997  // | | LR save word |
3998  // | +-----------------------------------+
3999  // SP--> +--- | Back chain |
4000  // +-----------------------------------+
4001  //
4002  // Specifications:
4003  // System V Application Binary Interface PowerPC Processor Supplement
4004  // AltiVec Technology Programming Interface Manual
4005 
4006  MachineFunction &MF = DAG.getMachineFunction();
4007  MachineFrameInfo &MFI = MF.getFrameInfo();
4008  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4009 
4010  EVT PtrVT = getPointerTy(MF.getDataLayout());
4011  // Potential tail calls could cause overwriting of argument stack slots.
4012  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4013  (CallConv == CallingConv::Fast));
4014  const Align PtrAlign(4);
4015 
4016  // Assign locations to all of the incoming arguments.
4018  PPCCCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
4019  *DAG.getContext());
4020 
4021  // Reserve space for the linkage area on the stack.
4022  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4023  CCInfo.AllocateStack(LinkageSize, PtrAlign);
4024  if (useSoftFloat())
4025  CCInfo.PreAnalyzeFormalArguments(Ins);
4026 
4027  CCInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4);
4028  CCInfo.clearWasPPCF128();
4029 
4030  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
4031  CCValAssign &VA = ArgLocs[i];
4032 
4033  // Arguments stored in registers.
4034  if (VA.isRegLoc()) {
4035  const TargetRegisterClass *RC;
4036  EVT ValVT = VA.getValVT();
4037 
4038  switch (ValVT.getSimpleVT().SimpleTy) {
4039  default:
4040  llvm_unreachable("ValVT not supported by formal arguments Lowering");
4041  case MVT::i1:
4042  case MVT::i32:
4043  RC = &PPC::GPRCRegClass;
4044  break;
4045  case MVT::f32:
4046  if (Subtarget.hasP8Vector())
4047  RC = &PPC::VSSRCRegClass;
4048  else if (Subtarget.hasSPE())
4049  RC = &PPC::GPRCRegClass;
4050  else
4051  RC = &PPC::F4RCRegClass;
4052  break;
4053  case MVT::f64:
4054  if (Subtarget.hasVSX())
4055  RC = &PPC::VSFRCRegClass;
4056  else if (Subtarget.hasSPE())
4057  // SPE passes doubles in GPR pairs.
4058  RC = &PPC::GPRCRegClass;
4059  else
4060  RC = &PPC::F8RCRegClass;
4061  break;
4062  case MVT::v16i8:
4063  case MVT::v8i16:
4064  case MVT::v4i32:
4065  RC = &PPC::VRRCRegClass;
4066  break;
4067  case MVT::v4f32:
4068  RC = &PPC::VRRCRegClass;
4069  break;
4070  case MVT::v2f64:
4071  case MVT::v2i64:
4072  RC = &PPC::VRRCRegClass;
4073  break;
4074  }
4075 
4076  SDValue ArgValue;
4077  // Transform the arguments stored in physical registers into
4078  // virtual ones.
4079  if (VA.getLocVT() == MVT::f64 && Subtarget.hasSPE()) {
4080  assert(i + 1 < e && "No second half of double precision argument");
4081  Register RegLo = MF.addLiveIn(VA.getLocReg(), RC);
4082  Register RegHi = MF.addLiveIn(ArgLocs[++i].getLocReg(), RC);
4083  SDValue ArgValueLo = DAG.getCopyFromReg(Chain, dl, RegLo, MVT::i32);
4084  SDValue ArgValueHi = DAG.getCopyFromReg(Chain, dl, RegHi, MVT::i32);
4085  if (!Subtarget.isLittleEndian())
4086  std::swap (ArgValueLo, ArgValueHi);
4087  ArgValue = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, ArgValueLo,
4088  ArgValueHi);
4089  } else {
4090  Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
4091  ArgValue = DAG.getCopyFromReg(Chain, dl, Reg,
4092  ValVT == MVT::i1 ? MVT::i32 : ValVT);
4093  if (ValVT == MVT::i1)
4094  ArgValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, ArgValue);
4095  }
4096 
4097  InVals.push_back(ArgValue);
4098  } else {
4099  // Argument stored in memory.
4100  assert(VA.isMemLoc());
4101 
4102  // Get the extended size of the argument type in stack
4103  unsigned ArgSize = VA.getLocVT().getStoreSize();
4104  // Get the actual size of the argument type
4105  unsigned ObjSize = VA.getValVT().getStoreSize();
4106  unsigned ArgOffset = VA.getLocMemOffset();
4107  // Stack objects in PPC32 are right justified.
4108  ArgOffset += ArgSize - ObjSize;
4109  int FI = MFI.CreateFixedObject(ArgSize, ArgOffset, isImmutable);
4110 
4111  // Create load nodes to retrieve arguments from the stack.
4112  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4113  InVals.push_back(
4114  DAG.getLoad(VA.getValVT(), dl, Chain, FIN, MachinePointerInfo()));
4115  }
4116  }
4117 
4118  // Assign locations to all of the incoming aggregate by value arguments.
4119  // Aggregates passed by value are stored in the local variable space of the
4120  // caller's stack frame, right above the parameter list area.
4121  SmallVector<CCValAssign, 16> ByValArgLocs;
4122  CCState CCByValInfo(CallConv, isVarArg, DAG.getMachineFunction(),
4123  ByValArgLocs, *DAG.getContext());
4124 
4125  // Reserve stack space for the allocations in CCInfo.
4126  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
4127 
4128  CCByValInfo.AnalyzeFormalArguments(Ins, CC_PPC32_SVR4_ByVal);
4129 
4130  // Area that is at least reserved in the caller of this function.
4131  unsigned MinReservedArea = CCByValInfo.getNextStackOffset();
4132  MinReservedArea = std::max(MinReservedArea, LinkageSize);
4133 
4134  // Set the size that is at least reserved in caller of this function. Tail
4135  // call optimized function's reserved stack space needs to be aligned so that
4136  // taking the difference between two stack areas will result in an aligned
4137  // stack.
4138  MinReservedArea =
4139  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4140  FuncInfo->setMinReservedArea(MinReservedArea);
4141 
4142  SmallVector<SDValue, 8> MemOps;
4143 
4144  // If the function takes variable number of arguments, make a frame index for
4145  // the start of the first vararg value... for expansion of llvm.va_start.
4146  if (isVarArg) {
4147  static const MCPhysReg GPArgRegs[] = {
4148  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
4149  PPC::R7, PPC::R8, PPC::R9, PPC::R10,
4150  };
4151  const unsigned NumGPArgRegs = array_lengthof(GPArgRegs);
4152 
4153  static const MCPhysReg FPArgRegs[] = {
4154  PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7,
4155  PPC::F8
4156  };
4157  unsigned NumFPArgRegs = array_lengthof(FPArgRegs);
4158 
4159  if (useSoftFloat() || hasSPE())
4160  NumFPArgRegs = 0;
4161 
4162  FuncInfo->setVarArgsNumGPR(CCInfo.getFirstUnallocated(GPArgRegs));
4163  FuncInfo->setVarArgsNumFPR(CCInfo.getFirstUnallocated(FPArgRegs));
4164 
4165  // Make room for NumGPArgRegs and NumFPArgRegs.
4166  int Depth = NumGPArgRegs * PtrVT.getSizeInBits()/8 +
4167  NumFPArgRegs * MVT(MVT::f64).getSizeInBits()/8;
4168 
4169  FuncInfo->setVarArgsStackOffset(
4170  MFI.CreateFixedObject(PtrVT.getSizeInBits()/8,
4171  CCInfo.getNextStackOffset(), true));
4172 
4173  FuncInfo->setVarArgsFrameIndex(
4174  MFI.CreateStackObject(Depth, Align(8), false));
4175  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4176 
4177  // The fixed integer arguments of a variadic function are stored to the
4178  // VarArgsFrameIndex on the stack so that they may be loaded by
4179  // dereferencing the result of va_next.
4180  for (unsigned GPRIndex = 0; GPRIndex != NumGPArgRegs; ++GPRIndex) {
4181  // Get an existing live-in vreg, or add a new one.
4182  Register VReg = MF.getRegInfo().getLiveInVirtReg(GPArgRegs[GPRIndex]);
4183  if (!VReg)
4184  VReg = MF.addLiveIn(GPArgRegs[GPRIndex], &PPC::GPRCRegClass);
4185 
4186  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4187  SDValue Store =
4188  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4189  MemOps.push_back(Store);
4190  // Increment the address by four for the next argument to store
4191  SDValue PtrOff = DAG.getConstant(PtrVT.getSizeInBits()/8, dl, PtrVT);
4192  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4193  }
4194 
4195  // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6
4196  // is set.
4197  // The double arguments are stored to the VarArgsFrameIndex
4198  // on the stack.
4199  for (unsigned FPRIndex = 0; FPRIndex != NumFPArgRegs; ++FPRIndex) {
4200  // Get an existing live-in vreg, or add a new one.
4201  Register VReg = MF.getRegInfo().getLiveInVirtReg(FPArgRegs[FPRIndex]);
4202  if (!VReg)
4203  VReg = MF.addLiveIn(FPArgRegs[FPRIndex], &PPC::F8RCRegClass);
4204 
4205  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, MVT::f64);
4206  SDValue Store =
4207  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4208  MemOps.push_back(Store);
4209  // Increment the address by eight for the next argument to store
4210  SDValue PtrOff = DAG.getConstant(MVT(MVT::f64).getSizeInBits()/8, dl,
4211  PtrVT);
4212  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4213  }
4214  }
4215 
4216  if (!MemOps.empty())
4217  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4218 
4219  return Chain;
4220 }
4221 
4222 // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4223 // value to MVT::i64 and then truncate to the correct register size.
4224 SDValue PPCTargetLowering::extendArgForPPC64(ISD::ArgFlagsTy Flags,
4225  EVT ObjectVT, SelectionDAG &DAG,
4226  SDValue ArgVal,
4227  const SDLoc &dl) const {
4228  if (Flags.isSExt())
4229  ArgVal = DAG.getNode(ISD::AssertSext, dl, MVT::i64, ArgVal,
4230  DAG.getValueType(ObjectVT));
4231  else if (Flags.isZExt())
4232  ArgVal = DAG.getNode(ISD::AssertZext, dl, MVT::i64, ArgVal,
4233  DAG.getValueType(ObjectVT));
4234 
4235  return DAG.getNode(ISD::TRUNCATE, dl, ObjectVT, ArgVal);
4236 }
4237 
4238 SDValue PPCTargetLowering::LowerFormalArguments_64SVR4(
4239  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
4240  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
4241  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
4242  // TODO: add description of PPC stack frame format, or at least some docs.
4243  //
4244  bool isELFv2ABI = Subtarget.isELFv2ABI();
4245  bool isLittleEndian = Subtarget.isLittleEndian();
4246  MachineFunction &MF = DAG.getMachineFunction();
4247  MachineFrameInfo &MFI = MF.getFrameInfo();
4248  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
4249 
4250  assert(!(CallConv == CallingConv::Fast && isVarArg) &&
4251  "fastcc not supported on varargs functions");
4252 
4253  EVT PtrVT = getPointerTy(MF.getDataLayout());
4254  // Potential tail calls could cause overwriting of argument stack slots.
4255  bool isImmutable = !(getTargetMachine().Options.GuaranteedTailCallOpt &&
4256  (CallConv == CallingConv::Fast));
4257  unsigned PtrByteSize = 8;
4258  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4259 
4260  static const MCPhysReg GPR[] = {
4261  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4262  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4263  };
4264  static const MCPhysReg VR[] = {
4265  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4266  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4267  };
4268 
4269  const unsigned Num_GPR_Regs = array_lengthof(GPR);
4270  const unsigned Num_FPR_Regs = useSoftFloat() ? 0 : 13;
4271  const unsigned Num_VR_Regs = array_lengthof(VR);
4272 
4273  // Do a first pass over the arguments to determine whether the ABI
4274  // guarantees that our caller has allocated the parameter save area
4275  // on its stack frame. In the ELFv1 ABI, this is always the case;
4276  // in the ELFv2 ABI, it is true if this is a vararg function or if
4277  // any parameter is located in a stack slot.
4278 
4279  bool HasParameterArea = !isELFv2ABI || isVarArg;
4280  unsigned ParamAreaSize = Num_GPR_Regs * PtrByteSize;
4281  unsigned NumBytes = LinkageSize;
4282  unsigned AvailableFPRs = Num_FPR_Regs;
4283  unsigned AvailableVRs = Num_VR_Regs;
4284  for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
4285  if (Ins[i].Flags.isNest())
4286  continue;
4287 
4288  if (CalculateStackSlotUsed(Ins[i].VT, Ins[i].ArgVT, Ins[i].Flags,
4289  PtrByteSize, LinkageSize, ParamAreaSize,
4290  NumBytes, AvailableFPRs, AvailableVRs))
4291  HasParameterArea = true;
4292  }
4293 
4294  // Add DAG nodes to load the arguments or copy them out of registers. On
4295  // entry to a function on PPC, the arguments start after the linkage area,
4296  // although the first ones are often in registers.
4297 
4298  unsigned ArgOffset = LinkageSize;
4299  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
4300  SmallVector<SDValue, 8> MemOps;
4302  unsigned CurArgIdx = 0;
4303  for (unsigned ArgNo = 0, e = Ins.size(); ArgNo != e; ++ArgNo) {
4304  SDValue ArgVal;
4305  bool needsLoad = false;
4306  EVT ObjectVT = Ins[ArgNo].VT;
4307  EVT OrigVT = Ins[ArgNo].ArgVT;
4308  unsigned ObjSize = ObjectVT.getStoreSize();
4309  unsigned ArgSize = ObjSize;
4310  ISD::ArgFlagsTy Flags = Ins[ArgNo].Flags;
4311  if (Ins[ArgNo].isOrigArg()) {
4312  std::advance(FuncArg, Ins[ArgNo].getOrigArgIndex() - CurArgIdx);
4313  CurArgIdx = Ins[ArgNo].getOrigArgIndex();
4314  }
4315  // We re-align the argument offset for each argument, except when using the
4316  // fast calling convention, when we need to make sure we do that only when
4317  // we'll actually use a stack slot.
4318  unsigned CurArgOffset;
4319  Align Alignment;
4320  auto ComputeArgOffset = [&]() {
4321  /* Respect alignment of argument on the stack. */
4322  Alignment =
4323  CalculateStackSlotAlignment(ObjectVT, OrigVT, Flags, PtrByteSize);
4324  ArgOffset = alignTo(ArgOffset, Alignment);
4325  CurArgOffset = ArgOffset;
4326  };
4327 
4328  if (CallConv != CallingConv::Fast) {
4329  ComputeArgOffset();
4330 
4331  /* Compute GPR index associated with argument offset. */
4332  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4333  GPR_idx = std::min(GPR_idx, Num_GPR_Regs);
4334  }
4335 
4336  // FIXME the codegen can be much improved in some cases.
4337  // We do not have to keep everything in memory.
4338  if (Flags.isByVal()) {
4339  assert(Ins[ArgNo].isOrigArg() && "Byval arguments cannot be implicit");
4340 
4341  if (CallConv == CallingConv::Fast)
4342  ComputeArgOffset();
4343 
4344  // ObjSize is the true size, ArgSize rounded up to multiple of registers.
4345  ObjSize = Flags.getByValSize();
4346  ArgSize = ((ObjSize + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4347  // Empty aggregate parameters do not take up registers. Examples:
4348  // struct { } a;
4349  // union { } b;
4350  // int c[0];
4351  // etc. However, we have to provide a place-holder in InVals, so
4352  // pretend we have an 8-byte item at the current address for that
4353  // purpose.
4354  if (!ObjSize) {
4355  int FI = MFI.CreateFixedObject(PtrByteSize, ArgOffset, true);
4356  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4357  InVals.push_back(FIN);
4358  continue;
4359  }
4360 
4361  // Create a stack object covering all stack doublewords occupied
4362  // by the argument. If the argument is (fully or partially) on
4363  // the stack, or if the argument is fully in registers but the
4364  // caller has allocated the parameter save anyway, we can refer
4365  // directly to the caller's stack frame. Otherwise, create a
4366  // local copy in our own frame.
4367  int FI;
4368  if (HasParameterArea ||
4369  ArgSize + ArgOffset > LinkageSize + Num_GPR_Regs * PtrByteSize)
4370  FI = MFI.CreateFixedObject(ArgSize, ArgOffset, false, true);
4371  else
4372  FI = MFI.CreateStackObject(ArgSize, Alignment, false);
4373  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4374 
4375  // Handle aggregates smaller than 8 bytes.
4376  if (ObjSize < PtrByteSize) {
4377  // The value of the object is its address, which differs from the
4378  // address of the enclosing doubleword on big-endian systems.
4379  SDValue Arg = FIN;
4380  if (!isLittleEndian) {
4381  SDValue ArgOff = DAG.getConstant(PtrByteSize - ObjSize, dl, PtrVT);
4382  Arg = DAG.getNode(ISD::ADD, dl, ArgOff.getValueType(), Arg, ArgOff);
4383  }
4384  InVals.push_back(Arg);
4385 
4386  if (GPR_idx != Num_GPR_Regs) {
4387  Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4388  FuncInfo->addLiveInAttr(VReg, Flags);
4389  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4390  EVT ObjType = EVT::getIntegerVT(*DAG.getContext(), ObjSize * 8);
4391  SDValue Store =
4392  DAG.getTruncStore(Val.getValue(1), dl, Val, Arg,
4393  MachinePointerInfo(&*FuncArg), ObjType);
4394  MemOps.push_back(Store);
4395  }
4396  // Whether we copied from a register or not, advance the offset
4397  // into the parameter save area by a full doubleword.
4398  ArgOffset += PtrByteSize;
4399  continue;
4400  }
4401 
4402  // The value of the object is its address, which is the address of
4403  // its first stack doubleword.
4404  InVals.push_back(FIN);
4405 
4406  // Store whatever pieces of the object are in registers to memory.
4407  for (unsigned j = 0; j < ArgSize; j += PtrByteSize) {
4408  if (GPR_idx == Num_GPR_Regs)
4409  break;
4410 
4411  Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4412  FuncInfo->addLiveInAttr(VReg, Flags);
4413  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4414  SDValue Addr = FIN;
4415  if (j) {
4416  SDValue Off = DAG.getConstant(j, dl, PtrVT);
4417  Addr = DAG.getNode(ISD::ADD, dl, Off.getValueType(), Addr, Off);
4418  }
4419  SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, Addr,
4420  MachinePointerInfo(&*FuncArg, j));
4421  MemOps.push_back(Store);
4422  ++GPR_idx;
4423  }
4424  ArgOffset += ArgSize;
4425  continue;
4426  }
4427 
4428  switch (ObjectVT.getSimpleVT().SimpleTy) {
4429  default: llvm_unreachable("Unhandled argument type!");
4430  case MVT::i1:
4431  case MVT::i32:
4432  case MVT::i64:
4433  if (Flags.isNest()) {
4434  // The 'nest' parameter, if any, is passed in R11.
4435  Register VReg = MF.addLiveIn(PPC::X11, &PPC::G8RCRegClass);
4436  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4437 
4438  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4439  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4440 
4441  break;
4442  }
4443 
4444  // These can be scalar arguments or elements of an integer array type
4445  // passed directly. Clang may use those instead of "byval" aggregate
4446  // types to avoid forcing arguments to memory unnecessarily.
4447  if (GPR_idx != Num_GPR_Regs) {
4448  Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4449  FuncInfo->addLiveInAttr(VReg, Flags);
4450  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4451 
4452  if (ObjectVT == MVT::i32 || ObjectVT == MVT::i1)
4453  // PPC64 passes i8, i16, and i32 values in i64 registers. Promote
4454  // value to MVT::i64 and then truncate to the correct register size.
4455  ArgVal = extendArgForPPC64(Flags, ObjectVT, DAG, ArgVal, dl);
4456  } else {
4457  if (CallConv == CallingConv::Fast)
4458  ComputeArgOffset();
4459 
4460  needsLoad = true;
4461  ArgSize = PtrByteSize;
4462  }
4463  if (CallConv != CallingConv::Fast || needsLoad)
4464  ArgOffset += 8;
4465  break;
4466 
4467  case MVT::f32:
4468  case MVT::f64:
4469  // These can be scalar arguments or elements of a float array type
4470  // passed directly. The latter are used to implement ELFv2 homogenous
4471  // float aggregates.
4472  if (FPR_idx != Num_FPR_Regs) {
4473  unsigned VReg;
4474 
4475  if (ObjectVT == MVT::f32)
4476  VReg = MF.addLiveIn(FPR[FPR_idx],
4477  Subtarget.hasP8Vector()
4478  ? &PPC::VSSRCRegClass
4479  : &PPC::F4RCRegClass);
4480  else
4481  VReg = MF.addLiveIn(FPR[FPR_idx], Subtarget.hasVSX()
4482  ? &PPC::VSFRCRegClass
4483  : &PPC::F8RCRegClass);
4484 
4485  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4486  ++FPR_idx;
4487  } else if (GPR_idx != Num_GPR_Regs && CallConv != CallingConv::Fast) {
4488  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
4489  // once we support fp <-> gpr moves.
4490 
4491  // This can only ever happen in the presence of f32 array types,
4492  // since otherwise we never run out of FPRs before running out
4493  // of GPRs.
4494  Register VReg = MF.addLiveIn(GPR[GPR_idx++], &PPC::G8RCRegClass);
4495  FuncInfo->addLiveInAttr(VReg, Flags);
4496  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, MVT::i64);
4497 
4498  if (ObjectVT == MVT::f32) {
4499  if ((ArgOffset % PtrByteSize) == (isLittleEndian ? 4 : 0))
4500  ArgVal = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgVal,
4501  DAG.getConstant(32, dl, MVT::i32));
4502  ArgVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, ArgVal);
4503  }
4504 
4505  ArgVal = DAG.getNode(ISD::BITCAST, dl, ObjectVT, ArgVal);
4506  } else {
4507  if (CallConv == CallingConv::Fast)
4508  ComputeArgOffset();
4509 
4510  needsLoad = true;
4511  }
4512 
4513  // When passing an array of floats, the array occupies consecutive
4514  // space in the argument area; only round up to the next doubleword
4515  // at the end of the array. Otherwise, each float takes 8 bytes.
4516  if (CallConv != CallingConv::Fast || needsLoad) {
4517  ArgSize = Flags.isInConsecutiveRegs() ? ObjSize : PtrByteSize;
4518  ArgOffset += ArgSize;
4519  if (Flags.isInConsecutiveRegsLast())
4520  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
4521  }
4522  break;
4523  case MVT::v4f32:
4524  case MVT::v4i32:
4525  case MVT::v8i16:
4526  case MVT::v16i8:
4527  case MVT::v2f64:
4528  case MVT::v2i64:
4529  case MVT::v1i128:
4530  case MVT::f128:
4531  // These can be scalar arguments or elements of a vector array type
4532  // passed directly. The latter are used to implement ELFv2 homogenous
4533  // vector aggregates.
4534  if (VR_idx != Num_VR_Regs) {
4535  Register VReg = MF.addLiveIn(VR[VR_idx], &PPC::VRRCRegClass);
4536  ArgVal = DAG.getCopyFromReg(Chain, dl, VReg, ObjectVT);
4537  ++VR_idx;
4538  } else {
4539  if (CallConv == CallingConv::Fast)
4540  ComputeArgOffset();
4541  needsLoad = true;
4542  }
4543  if (CallConv != CallingConv::Fast || needsLoad)
4544  ArgOffset += 16;
4545  break;
4546  }
4547 
4548  // We need to load the argument to a virtual register if we determined
4549  // above that we ran out of physical registers of the appropriate type.
4550  if (needsLoad) {
4551  if (ObjSize < ArgSize && !isLittleEndian)
4552  CurArgOffset += ArgSize - ObjSize;
4553  int FI = MFI.CreateFixedObject(ObjSize, CurArgOffset, isImmutable);
4554  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
4555  ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, MachinePointerInfo());
4556  }
4557 
4558  InVals.push_back(ArgVal);
4559  }
4560 
4561  // Area that is at least reserved in the caller of this function.
4562  unsigned MinReservedArea;
4563  if (HasParameterArea)
4564  MinReservedArea = std::max(ArgOffset, LinkageSize + 8 * PtrByteSize);
4565  else
4566  MinReservedArea = LinkageSize;
4567 
4568  // Set the size that is at least reserved in caller of this function. Tail
4569  // call optimized functions' reserved stack space needs to be aligned so that
4570  // taking the difference between two stack areas will result in an aligned
4571  // stack.
4572  MinReservedArea =
4573  EnsureStackAlignment(Subtarget.getFrameLowering(), MinReservedArea);
4574  FuncInfo->setMinReservedArea(MinReservedArea);
4575 
4576  // If the function takes variable number of arguments, make a frame index for
4577  // the start of the first vararg value... for expansion of llvm.va_start.
4578  // On ELFv2ABI spec, it writes:
4579  // C programs that are intended to be *portable* across different compilers
4580  // and architectures must use the header file <stdarg.h> to deal with variable
4581  // argument lists.
4582  if (isVarArg && MFI.hasVAStart()) {
4583  int Depth = ArgOffset;
4584 
4585  FuncInfo->setVarArgsFrameIndex(
4586  MFI.CreateFixedObject(PtrByteSize, Depth, true));
4587  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
4588 
4589  // If this function is vararg, store any remaining integer argument regs
4590  // to their spots on the stack so that they may be loaded by dereferencing
4591  // the result of va_next.
4592  for (GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
4593  GPR_idx < Num_GPR_Regs; ++GPR_idx) {
4594  Register VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::G8RCRegClass);
4595  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
4596  SDValue Store =
4597  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
4598  MemOps.push_back(Store);
4599  // Increment the address by four for the next argument to store
4600  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
4601  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
4602  }
4603  }
4604 
4605  if (!MemOps.empty())
4606  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
4607 
4608  return Chain;
4609 }
4610 
4611 /// CalculateTailCallSPDiff - Get the amount the stack pointer has to be
4612 /// adjusted to accommodate the arguments for the tailcall.
4613 static int CalculateTailCallSPDiff(SelectionDAG& DAG, bool isTailCall,
4614  unsigned ParamSize) {
4615 
4616  if (!isTailCall) return 0;
4617 
4619  unsigned CallerMinReservedArea = FI->getMinReservedArea();
4620  int SPDiff = (int)CallerMinReservedArea - (int)ParamSize;
4621  // Remember only if the new adjustment is bigger.
4622  if (SPDiff < FI->getTailCallSPDelta())
4623  FI->setTailCallSPDelta(SPDiff);
4624 
4625  return SPDiff;
4626 }
4627 
4628 static bool isFunctionGlobalAddress(SDValue Callee);
4629 
4630 static bool callsShareTOCBase(const Function *Caller, SDValue Callee,
4631  const TargetMachine &TM) {
4632  // It does not make sense to call callsShareTOCBase() with a caller that
4633  // is PC Relative since PC Relative callers do not have a TOC.
4634 #ifndef NDEBUG
4635  const PPCSubtarget *STICaller = &TM.getSubtarget<PPCSubtarget>(*Caller);
4636  assert(!STICaller->isUsingPCRelativeCalls() &&
4637  "PC Relative callers do not have a TOC and cannot share a TOC Base");
4638 #endif
4639 
4640  // Callee is either a GlobalAddress or an ExternalSymbol. ExternalSymbols
4641  // don't have enough information to determine if the caller and callee share
4642  // the same TOC base, so we have to pessimistically assume they don't for
4643  // correctness.
4644  GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
4645  if (!G)
4646  return false;
4647 
4648  const GlobalValue *GV = G->getGlobal();
4649 
4650  // If the callee is preemptable, then the static linker will use a plt-stub
4651  // which saves the toc to the stack, and needs a nop after the call
4652  // instruction to convert to a toc-restore.
4653  if (!TM.shouldAssumeDSOLocal(*Caller->getParent(), GV))
4654  return false;
4655 
4656  // Functions with PC Relative enabled may clobber the TOC in the same DSO.
4657  // We may need a TOC restore in the situation where the caller requires a
4658  // valid TOC but the callee is PC Relative and does not.
4659  const Function *F = dyn_cast<Function>(GV);
4660  const GlobalAlias *Alias = dyn_cast<GlobalAlias>(GV);
4661 
4662  // If we have an Alias we can try to get the function from there.
4663  if (Alias) {
4664  const GlobalObject *GlobalObj = Alias->getAliaseeObject();
4665  F = dyn_cast<Function>(GlobalObj);
4666  }
4667 
4668  // If we still have no valid function pointer we do not have enough
4669  // information to determine if the callee uses PC Relative calls so we must
4670  // assume that it does.
4671  if (!F)
4672  return false;
4673 
4674  // If the callee uses PC Relative we cannot guarantee that the callee won't
4675  // clobber the TOC of the caller and so we must assume that the two
4676  // functions do not share a TOC base.
4677  const PPCSubtarget *STICallee = &TM.getSubtarget<PPCSubtarget>(*F);
4678  if (STICallee->isUsingPCRelativeCalls())
4679  return false;
4680 
4681  // If the GV is not a strong definition then we need to assume it can be
4682  // replaced by another function at link time. The function that replaces
4683  // it may not share the same TOC as the caller since the callee may be
4684  // replaced by a PC Relative version of the same function.
4685  if (!GV->isStrongDefinitionForLinker())
4686  return false;
4687 
4688  // The medium and large code models are expected to provide a sufficiently
4689  // large TOC to provide all data addressing needs of a module with a
4690  // single TOC.
4691  if (CodeModel::Medium == TM.getCodeModel() ||
4692  CodeModel::Large == TM.getCodeModel())
4693  return true;
4694 
4695  // Any explicitly-specified sections and section prefixes must also match.
4696  // Also, if we're using -ffunction-sections, then each function is always in
4697  // a different section (the same is true for COMDAT functions).
4698  if (TM.getFunctionSections() || GV->hasComdat() || Caller->hasComdat() ||
4699  GV->getSection() != Caller->getSection())
4700  return false;
4701  if (const auto *F = dyn_cast<Function>(GV)) {
4702  if (F->getSectionPrefix() != Caller->getSectionPrefix())
4703  return false;
4704  }
4705 
4706  return true;
4707 }
4708 
4709 static bool
4711  const SmallVectorImpl<ISD::OutputArg> &Outs) {
4712  assert(Subtarget.is64BitELFABI());
4713 
4714  const unsigned PtrByteSize = 8;
4715  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
4716 
4717  static const MCPhysReg GPR[] = {
4718  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
4719  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
4720  };
4721  static const MCPhysReg VR[] = {
4722  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
4723  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
4724  };
4725 
4726  const unsigned NumGPRs = array_lengthof(GPR);
4727  const unsigned NumFPRs = 13;
4728  const unsigned NumVRs = array_lengthof(VR);
4729  const unsigned ParamAreaSize = NumGPRs * PtrByteSize;
4730 
4731  unsigned NumBytes = LinkageSize;
4732  unsigned AvailableFPRs = NumFPRs;
4733  unsigned AvailableVRs = NumVRs;
4734 
4735  for (const ISD::OutputArg& Param : Outs) {
4736  if (Param.Flags.isNest()) continue;
4737 
4738  if (CalculateStackSlotUsed(Param.VT, Param.ArgVT, Param.Flags, PtrByteSize,
4739  LinkageSize, ParamAreaSize, NumBytes,
4740  AvailableFPRs, AvailableVRs))
4741  return true;
4742  }
4743  return false;
4744 }
4745 
4746 static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB) {
4747  if (CB.arg_size() != CallerFn->arg_size())
4748  return false;
4749 
4750  auto CalleeArgIter = CB.arg_begin();
4751  auto CalleeArgEnd = CB.arg_end();
4752  Function::const_arg_iterator CallerArgIter = CallerFn->arg_begin();
4753 
4754  for (; CalleeArgIter != CalleeArgEnd; ++CalleeArgIter, ++CallerArgIter) {
4755  const Value* CalleeArg = *CalleeArgIter;
4756  const Value* CallerArg = &(*CallerArgIter);
4757  if (CalleeArg == CallerArg)
4758  continue;
4759 
4760  // e.g. @caller([4 x i64] %a, [4 x i64] %b) {
4761  // tail call @callee([4 x i64] undef, [4 x i64] %b)
4762  // }
4763  // 1st argument of callee is undef and has the same type as caller.
4764  if (CalleeArg->getType() == CallerArg->getType() &&
4765  isa<UndefValue>(CalleeArg))
4766  continue;
4767 
4768  return false;
4769  }
4770 
4771  return true;
4772 }
4773 
4774 // Returns true if TCO is possible between the callers and callees
4775 // calling conventions.
4776 static bool
4778  CallingConv::ID CalleeCC) {
4779  // Tail calls are possible with fastcc and ccc.
4780  auto isTailCallableCC = [] (CallingConv::ID CC){
4781  return CC == CallingConv::C || CC == CallingConv::Fast;
4782  };
4783  if (!isTailCallableCC(CallerCC) || !isTailCallableCC(CalleeCC))
4784  return false;
4785 
4786  // We can safely tail call both fastcc and ccc callees from a c calling
4787  // convention caller. If the caller is fastcc, we may have less stack space
4788  // than a non-fastcc caller with the same signature so disable tail-calls in
4789  // that case.
4790  return CallerCC == CallingConv::C || CallerCC == CalleeCC;
4791 }
4792 
4793 bool PPCTargetLowering::IsEligibleForTailCallOptimization_64SVR4(
4794  SDValue Callee, CallingConv::ID CalleeCC, const CallBase *CB, bool isVarArg,
4795  const SmallVectorImpl<ISD::OutputArg> &Outs,
4796  const SmallVectorImpl<ISD::InputArg> &Ins, SelectionDAG &DAG) const {
4797  bool TailCallOpt = getTargetMachine().Options.GuaranteedTailCallOpt;
4798 
4799  if (DisableSCO && !TailCallOpt) return false;
4800 
4801  // Variadic argument functions are not supported.
4802  if (isVarArg) return false;
4803 
4804  auto &Caller = DAG.getMachineFunction().getFunction();
4805  // Check that the calling conventions are compatible for tco.
4806  if (!areCallingConvEligibleForTCO_64SVR4(Caller.getCallingConv(), CalleeCC))
4807  return false;
4808 
4809  // Caller contains any byval parameter is not supported.
4810  if (any_of(Ins, [](const ISD::InputArg &IA) { return IA.Flags.isByVal(); }))
4811  return false;
4812 
4813  // Callee contains any byval parameter is not supported, too.
4814  // Note: This is a quick work around, because in some cases, e.g.
4815  // caller's stack size > callee's stack size, we are still able to apply
4816  // sibling call optimization. For example, gcc is able to do SCO for caller1
4817  // in the following example, but not for caller2.
4818  // struct test {
4819  // long int a;
4820  // char ary[56];
4821  // } gTest;
4822  // __attribute__((noinline)) int callee(struct test v, struct test *b) {
4823  // b->a = v.a;
4824  // return 0;
4825  // }
4826  // void caller1(struct test a, struct test c, struct test *b) {
4827  // callee(gTest, b); }
4828  // void caller2(struct test *b) { callee(gTest, b); }
4829  if (any_of(Outs, [](const ISD::OutputArg& OA) { return OA.Flags.isByVal(); }))
4830  return false;
4831 
4832  // If callee and caller use different calling conventions, we cannot pass
4833  // parameters on stack since offsets for the parameter area may be different.
4834  if (Caller.getCallingConv() != CalleeCC &&
4835  needStackSlotPassParameters(Subtarget, Outs))
4836  return false;
4837 
4838  // All variants of 64-bit ELF ABIs without PC-Relative addressing require that
4839  // the caller and callee share the same TOC for TCO/SCO. If the caller and
4840  // callee potentially have different TOC bases then we cannot tail call since
4841  // we need to restore the TOC pointer after the call.
4842  // ref: https://bugzilla.mozilla.org/show_bug.cgi?id=973977
4843  // We cannot guarantee this for indirect calls or calls to external functions.
4844  // When PC-Relative addressing is used, the concept of the TOC is no longer
4845  // applicable so this check is not required.
4846  // Check first for indirect calls.
4847  if (!Subtarget.isUsingPCRelativeCalls() &&
4848  !isFunctionGlobalAddress(Callee) && !isa<ExternalSymbolSDNode>(Callee))
4849  return false;
4850 
4851  // Check if we share the TOC base.
4852  if (!Subtarget.isUsingPCRelativeCalls() &&
4853  !callsShareTOCBase(&Caller, Callee, getTargetMachine()))
4854  return false;
4855 
4856  // TCO allows altering callee ABI, so we don't have to check further.
4857  if (CalleeCC == CallingConv::Fast && TailCallOpt)
4858  return true;
4859 
4860  if (DisableSCO) return false;
4861 
4862  // If callee use the same argument list that caller is using, then we can
4863  // apply SCO on this case. If it is not, then we need to check if callee needs
4864  // stack for passing arguments.
4865  // PC Relative tail calls may not have a CallBase.
4866  // If there is no CallBase we cannot verify if we have the same argument
4867  // list so assume that we don't have the same argument list.
4868  if (CB && !hasSameArgumentList(&Caller, *CB) &&
4869  needStackSlotPassParameters(Subtarget, Outs))
4870  return false;
4871  else if (!CB && needStackSlotPassParameters(Subtarget, Outs))
4872  return false;
4873 
4874  return true;
4875 }
4876 
4877 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
4878 /// for tail call optimization. Targets which want to do tail call
4879 /// optimization should implement this function.
4880 bool
4881 PPCTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee,
4882  CallingConv::ID CalleeCC,
4883  bool isVarArg,
4885  SelectionDAG& DAG) const {
4886  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
4887  return false;
4888 
4889  // Variable argument functions are not supported.
4890  if (isVarArg)
4891  return false;
4892 
4893  MachineFunction &MF = DAG.getMachineFunction();
4894  CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
4895  if (CalleeCC == CallingConv::Fast && CallerCC == CalleeCC) {
4896  // Functions containing by val parameters are not supported.
4897  for (unsigned i = 0; i != Ins.size(); i++) {
4898  ISD::ArgFlagsTy Flags = Ins[i].Flags;
4899  if (Flags.isByVal()) return false;
4900  }
4901 
4902  // Non-PIC/GOT tail calls are supported.
4903  if (getTargetMachine().getRelocationModel() != Reloc::PIC_)
4904  return true;
4905 
4906  // At the moment we can only do local tail calls (in same module, hidden
4907  // or protected) if we are generating PIC.
4908  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
4909  return G->getGlobal()->hasHiddenVisibility()
4910  || G->getGlobal()->hasProtectedVisibility();
4911  }
4912 
4913  return false;
4914 }
4915 
4916 /// isCallCompatibleAddress - Return the immediate to use if the specified
4917 /// 32-bit value is representable in the immediate field of a BxA instruction.
4919  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op);
4920  if (!C) return nullptr;
4921 
4922  int Addr = C->getZExtValue();
4923  if ((Addr & 3) != 0 || // Low 2 bits are implicitly zero.
4924  SignExtend32<26>(Addr) != Addr)
4925  return nullptr; // Top 6 bits have to be sext of immediate.
4926 
4927  return DAG
4928  .getConstant(
4929  (int)C->getZExtValue() >> 2, SDLoc(Op),
4931  .getNode();
4932 }
4933 
4934 namespace {
4935 
4936 struct TailCallArgumentInfo {
4937  SDValue Arg;
4938  SDValue FrameIdxOp;
4939  int FrameIdx = 0;
4940 
4941  TailCallArgumentInfo() = default;
4942 };
4943 
4944 } // end anonymous namespace
4945 
4946 /// StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
4948  SelectionDAG &DAG, SDValue Chain,
4949  const SmallVectorImpl<TailCallArgumentInfo> &TailCallArgs,
4950  SmallVectorImpl<SDValue> &MemOpChains, const SDLoc &dl) {
4951  for (unsigned i = 0, e = TailCallArgs.size(); i != e; ++i) {
4952  SDValue Arg = TailCallArgs[i].Arg;
4953  SDValue FIN = TailCallArgs[i].FrameIdxOp;
4954  int FI = TailCallArgs[i].FrameIdx;
4955  // Store relative to framepointer.
4956  MemOpChains.push_back(DAG.getStore(
4957  Chain, dl, Arg, FIN,
4959  }
4960 }
4961 
4962 /// EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to
4963 /// the appropriate stack slot for the tail call optimized function call.
4965  SDValue OldRetAddr, SDValue OldFP,
4966  int SPDiff, const SDLoc &dl) {
4967  if (SPDiff) {
4968  // Calculate the new stack slot for the return address.
4969  MachineFunction &MF = DAG.getMachineFunction();
4970  const PPCSubtarget &Subtarget = MF.getSubtarget<PPCSubtarget>();
4971  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
4972  bool isPPC64 = Subtarget.isPPC64();
4973  int SlotSize = isPPC64 ? 8 : 4;
4974  int NewRetAddrLoc = SPDiff + FL->getReturnSaveOffset();
4975  int NewRetAddr = MF.getFrameInfo().CreateFixedObject(SlotSize,
4976  NewRetAddrLoc, true);
4977  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4978  SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
4979  Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
4980  MachinePointerInfo::getFixedStack(MF, NewRetAddr));
4981  }
4982  return Chain;
4983 }
4984 
4985 /// CalculateTailCallArgDest - Remember Argument for later processing. Calculate
4986 /// the position of the argument.
4987 static void
4989  SDValue Arg, int SPDiff, unsigned ArgOffset,
4990  SmallVectorImpl<TailCallArgumentInfo>& TailCallArguments) {
4991  int Offset = ArgOffset + SPDiff;
4992  uint32_t OpSize = (Arg.getValueSizeInBits() + 7) / 8;
4993  int FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
4994  EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
4995  SDValue FIN = DAG.getFrameIndex(FI, VT);
4996  TailCallArgumentInfo Info;
4997  Info.Arg = Arg;
4998  Info.FrameIdxOp = FIN;
4999  Info.FrameIdx = FI;
5000  TailCallArguments.push_back(Info);
5001 }
5002 
5003 /// EmitTCFPAndRetAddrLoad - Emit load from frame pointer and return address
5004 /// stack slot. Returns the chain as result and the loaded frame pointers in
5005 /// LROpOut/FPOpout. Used when tail calling.
5006 SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(
5007  SelectionDAG &DAG, int SPDiff, SDValue Chain, SDValue &LROpOut,
5008  SDValue &FPOpOut, const SDLoc &dl) const {
5009  if (SPDiff) {
5010  // Load the LR and FP stack slot for later adjusting.
5011  EVT VT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5012  LROpOut = getReturnAddrFrameIndex(DAG);
5013  LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, MachinePointerInfo());
5014  Chain = SDValue(LROpOut.getNode(), 1);
5015  }
5016  return Chain;
5017 }
5018 
5019 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
5020 /// by "Src" to address "Dst" of size "Size". Alignment information is
5021 /// specified by the specific parameter attribute. The copy will be passed as
5022 /// a byval function parameter.
5023 /// Sometimes what we are copying is the end of a larger object, the part that
5024 /// does not fit in registers.
5026  SDValue Chain, ISD::ArgFlagsTy Flags,
5027  SelectionDAG &DAG, const SDLoc &dl) {
5028  SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
5029  return DAG.getMemcpy(Chain, dl, Dst, Src, SizeNode,
5030  Flags.getNonZeroByValAlign(), false, false, false,
5032 }
5033 
5034 /// LowerMemOpCallTo - Store the argument to the stack or remember it in case of
5035 /// tail calls.
5036 static void LowerMemOpCallTo(
5037  SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg,
5038  SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64,
5039  bool isTailCall, bool isVector, SmallVectorImpl<SDValue> &MemOpChains,
5040  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments, const SDLoc &dl) {
5041  EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5042  if (!isTailCall) {
5043  if (isVector) {
5044  SDValue StackPtr;
5045  if (isPPC64)
5046  StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
5047  else
5048  StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5049  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr,
5050  DAG.getConstant(ArgOffset, dl, PtrVT));
5051  }
5052  MemOpChains.push_back(
5053  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5054  // Calculate and remember argument location.
5055  } else CalculateTailCallArgDest(DAG, MF, isPPC64, Arg, SPDiff, ArgOffset,
5056  TailCallArguments);
5057 }
5058 
5059 static void
5061  const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp,
5062  SDValue FPOp,
5063  SmallVectorImpl<TailCallArgumentInfo> &TailCallArguments) {
5064  // Emit a sequence of copyto/copyfrom virtual registers for arguments that
5065  // might overwrite each other in case of tail call optimization.
5066  SmallVector<SDValue, 8> MemOpChains2;
5067  // Do not flag preceding copytoreg stuff together with the following stuff.
5068  InFlag = SDValue();
5069  StoreTailCallArgumentsToStackSlot(DAG, Chain, TailCallArguments,
5070  MemOpChains2, dl);
5071  if (!MemOpChains2.empty())
5072  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
5073 
5074  // Store the return address to the appropriate stack slot.
5075  Chain = EmitTailCallStoreFPAndRetAddr(DAG, Chain, LROp, FPOp, SPDiff, dl);
5076 
5077  // Emit callseq_end just before tailcall node.
5078  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5079  DAG.getIntPtrConstant(0, dl, true), InFlag, dl);
5080  InFlag = Chain.getValue(1);
5081 }
5082 
5083 // Is this global address that of a function that can be called by name? (as
5084 // opposed to something that must hold a descriptor for an indirect call).
5085 static bool isFunctionGlobalAddress(SDValue Callee) {
5086  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
5087  if (Callee.getOpcode() == ISD::GlobalTLSAddress ||
5088  Callee.getOpcode() == ISD::TargetGlobalTLSAddress)
5089  return false;
5090 
5091  return G->getGlobal()->getValueType()->isFunctionTy();
5092  }
5093 
5094  return false;
5095 }
5096 
5097 SDValue PPCTargetLowering::LowerCallResult(
5098  SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg,
5099  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5100  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
5102  CCState CCRetInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
5103  *DAG.getContext());
5104 
5105  CCRetInfo.AnalyzeCallResult(
5106  Ins, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
5107  ? RetCC_PPC_Cold
5108  : RetCC_PPC);
5109 
5110  // Copy all of the result registers out of their specified physreg.
5111  for (unsigned i = 0, e = RVLocs.size(); i != e; ++i) {
5112  CCValAssign &VA = RVLocs[i];
5113  assert(VA.isRegLoc() && "Can only return in registers!");
5114 
5115  SDValue Val;
5116 
5117  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
5118  SDValue Lo = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5119  InFlag);
5120  Chain = Lo.getValue(1);
5121  InFlag = Lo.getValue(2);
5122  VA = RVLocs[++i]; // skip ahead to next loc
5123  SDValue Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32,
5124  InFlag);
5125  Chain = Hi.getValue(1);
5126  InFlag = Hi.getValue(2);
5127  if (!Subtarget.isLittleEndian())
5128  std::swap (Lo, Hi);
5129  Val = DAG.getNode(PPCISD::BUILD_SPE64, dl, MVT::f64, Lo, Hi);
5130  } else {
5131  Val = DAG.getCopyFromReg(Chain, dl,
5132  VA.getLocReg(), VA.getLocVT(), InFlag);
5133  Chain = Val.getValue(1);
5134  InFlag = Val.getValue(2);
5135  }
5136 
5137  switch (VA.getLocInfo()) {
5138  default: llvm_unreachable("Unknown loc info!");
5139  case CCValAssign::Full: break;
5140  case CCValAssign::AExt:
5141  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5142  break;
5143  case CCValAssign::ZExt:
5144  Val = DAG.getNode(ISD::AssertZext, dl, VA.getLocVT(), Val,
5145  DAG.getValueType(VA.getValVT()));
5146  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5147  break;
5148  case CCValAssign::SExt:
5149  Val = DAG.getNode(ISD::AssertSext, dl, VA.getLocVT(), Val,
5150  DAG.getValueType(VA.getValVT()));
5151  Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
5152  break;
5153  }
5154 
5155  InVals.push_back(Val);
5156  }
5157 
5158  return Chain;
5159 }
5160 
5161 static bool isIndirectCall(const SDValue &Callee, SelectionDAG &DAG,
5162  const PPCSubtarget &Subtarget, bool isPatchPoint) {
5163  // PatchPoint calls are not indirect.
5164  if (isPatchPoint)
5165  return false;
5166 
5167  if (isFunctionGlobalAddress(Callee) || isa<ExternalSymbolSDNode>(Callee))
5168  return false;
5169 
5170  // Darwin, and 32-bit ELF can use a BLA. The descriptor based ABIs can not
5171  // becuase the immediate function pointer points to a descriptor instead of
5172  // a function entry point. The ELFv2 ABI cannot use a BLA because the function
5173  // pointer immediate points to the global entry point, while the BLA would
5174  // need to jump to the local entry point (see rL211174).
5175  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI() &&
5177  return false;
5178 
5179  return true;
5180 }
5181 
5182 // AIX and 64-bit ELF ABIs w/o PCRel require a TOC save/restore around calls.
5183 static inline bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget) {
5184  return Subtarget.isAIXABI() ||
5185  (Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls());
5186 }
5187 
5189  const Function &Caller, const SDValue &Callee,
5190  const PPCSubtarget &Subtarget,
5191  const TargetMachine &TM,
5192  bool IsStrictFPCall = false) {
5193  if (CFlags.IsTailCall)
5194  return PPCISD::TC_RETURN;
5195 
5196  unsigned RetOpc = 0;
5197  // This is a call through a function pointer.
5198  if (CFlags.IsIndirect) {
5199  // AIX and the 64-bit ELF ABIs need to maintain the TOC pointer accross
5200  // indirect calls. The save of the caller's TOC pointer to the stack will be
5201  // inserted into the DAG as part of call lowering. The restore of the TOC
5202  // pointer is modeled by using a pseudo instruction for the call opcode that
5203  // represents the 2 instruction sequence of an indirect branch and link,
5204  // immediately followed by a load of the TOC pointer from the the stack save
5205  // slot into gpr2. For 64-bit ELFv2 ABI with PCRel, do not restore the TOC
5206  // as it is not saved or used.
5207  RetOpc = isTOCSaveRestoreRequired(Subtarget) ? PPCISD::BCTRL_LOAD_TOC
5208  : PPCISD::BCTRL;
5209  } else if (Subtarget.isUsingPCRelativeCalls()) {
5210  assert(Subtarget.is64BitELFABI() && "PC Relative is only on ELF ABI.");
5211  RetOpc = PPCISD::CALL_NOTOC;
5212  } else if (Subtarget.isAIXABI() || Subtarget.is64BitELFABI())
5213  // The ABIs that maintain a TOC pointer accross calls need to have a nop
5214  // immediately following the call instruction if the caller and callee may
5215  // have different TOC bases. At link time if the linker determines the calls
5216  // may not share a TOC base, the call is redirected to a trampoline inserted
5217  // by the linker. The trampoline will (among other things) save the callers
5218  // TOC pointer at an ABI designated offset in the linkage area and the
5219  // linker will rewrite the nop to be a load of the TOC pointer from the
5220  // linkage area into gpr2.
5221  RetOpc = callsShareTOCBase(&Caller, Callee, TM) ? PPCISD::CALL
5222  : PPCISD::CALL_NOP;
5223  else
5224  RetOpc = PPCISD::CALL;
5225  if (IsStrictFPCall) {
5226  switch (RetOpc) {
5227  default:
5228  llvm_unreachable("Unknown call opcode");
5230  RetOpc = PPCISD::BCTRL_LOAD_TOC_RM;
5231  break;
5232  case PPCISD::BCTRL:
5233  RetOpc = PPCISD::BCTRL_RM;
5234  break;
5235  case PPCISD::CALL_NOTOC:
5236  RetOpc = PPCISD::CALL_NOTOC_RM;
5237  break;
5238  case PPCISD::CALL:
5239  RetOpc = PPCISD::CALL_RM;
5240  break;
5241  case PPCISD::CALL_NOP:
5242  RetOpc = PPCISD::CALL_NOP_RM;
5243  break;
5244  }
5245  }
5246  return RetOpc;
5247 }
5248 
5249 static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG,
5250  const SDLoc &dl, const PPCSubtarget &Subtarget) {
5251  if (!Subtarget.usesFunctionDescriptors() && !Subtarget.isELFv2ABI())
5252  if (SDNode *Dest = isBLACompatibleAddress(Callee, DAG))
5253  return SDValue(Dest, 0);
5254 
5255  // Returns true if the callee is local, and false otherwise.
5256  auto isLocalCallee = [&]() {
5257  const GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
5258  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5259  const GlobalValue *GV = G ? G->getGlobal() : nullptr;
5260 
5261  return DAG.getTarget().shouldAssumeDSOLocal(*Mod, GV) &&
5262  !isa_and_nonnull<GlobalIFunc>(GV);
5263  };
5264 
5265  // The PLT is only used in 32-bit ELF PIC mode. Attempting to use the PLT in
5266  // a static relocation model causes some versions of GNU LD (2.17.50, at
5267  // least) to force BSS-PLT, instead of secure-PLT, even if all objects are
5268  // built with secure-PLT.
5269  bool UsePlt =
5270  Subtarget.is32BitELFABI() && !isLocalCallee() &&
5272 
5273  const auto getAIXFuncEntryPointSymbolSDNode = [&](const GlobalValue *GV) {
5274  const TargetMachine &TM = Subtarget.getTargetMachine();
5275  const TargetLoweringObjectFile *TLOF = TM.getObjFileLowering();
5276  MCSymbolXCOFF *S =
5277  cast<MCSymbolXCOFF>(TLOF->getFunctionEntryPointSymbol(GV, TM));
5278 
5279  MVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout());
5280  return DAG.getMCSymbol(S, PtrVT);
5281  };
5282 
5284  const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
5285 
5286  if (Subtarget.isAIXABI()) {
5287  assert(!isa<GlobalIFunc>(GV) && "IFunc is not supported on AIX.");
5288  return getAIXFuncEntryPointSymbolSDNode(GV);
5289  }
5290  return DAG.getTargetGlobalAddress(GV, dl, Callee.getValueType(), 0,
5291  UsePlt ? PPCII::MO_PLT : 0);
5292  }
5293 
5294  if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
5295  const char *SymName = S->getSymbol();
5296  if (Subtarget.isAIXABI()) {
5297  // If there exists a user-declared function whose name is the same as the
5298  // ExternalSymbol's, then we pick up the user-declared version.
5299  const Module *Mod = DAG.getMachineFunction().getFunction().getParent();
5300  if (const Function *F =
5301  dyn_cast_or_null<Function>(Mod->getNamedValue(SymName)))
5302  return getAIXFuncEntryPointSymbolSDNode(F);
5303 
5304  // On AIX, direct function calls reference the symbol for the function's
5305  // entry point, which is named by prepending a "." before the function's
5306  // C-linkage name. A Qualname is returned here because an external
5307  // function entry point is a csect with XTY_ER property.
5308  const auto getExternalFunctionEntryPointSymbol = [&](StringRef SymName) {
5309  auto &Context = DAG.getMachineFunction().getMMI().getContext();
5310  MCSectionXCOFF *Sec = Context.getXCOFFSection(
5311  (Twine(".") + Twine(SymName)).str(), SectionKind::getMetadata(),
5313  return Sec->getQualNameSymbol();
5314  };
5315 
5316  SymName = getExternalFunctionEntryPointSymbol(SymName)->getName().data();
5317  }
5318  return DAG.getTargetExternalSymbol(SymName, Callee.getValueType(),
5319  UsePlt ? PPCII::MO_PLT : 0);
5320  }
5321 
5322  // No transformation needed.
5323  assert(Callee.getNode() && "What no callee?");
5324  return Callee;
5325 }
5326 
5328  assert(CallSeqStart.getOpcode() == ISD::CALLSEQ_START &&
5329  "Expected a CALLSEQ_STARTSDNode.");
5330 
5331  // The last operand is the chain, except when the node has glue. If the node
5332  // has glue, then the last operand is the glue, and the chain is the second
5333  // last operand.
5334  SDValue LastValue = CallSeqStart.getValue(CallSeqStart->getNumValues() - 1);
5335  if (LastValue.getValueType() != MVT::Glue)
5336  return LastValue;
5337 
5338  return CallSeqStart.getValue(CallSeqStart->getNumValues() - 2);
5339 }
5340 
5341 // Creates the node that moves a functions address into the count register
5342 // to prepare for an indirect call instruction.
5343 static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee,
5344  SDValue &Glue, SDValue &Chain,
5345  const SDLoc &dl) {
5346  SDValue MTCTROps[] = {Chain, Callee, Glue};
5347  EVT ReturnTypes[] = {MVT::Other, MVT::Glue};
5348  Chain = DAG.getNode(PPCISD::MTCTR, dl, makeArrayRef(ReturnTypes, 2),
5349  makeArrayRef(MTCTROps, Glue.getNode() ? 3 : 2));
5350  // The glue is the second value produced.
5351  Glue = Chain.getValue(1);
5352 }
5353 
5355  SDValue &Glue, SDValue &Chain,
5356  SDValue CallSeqStart,
5357  const CallBase *CB, const SDLoc &dl,
5358  bool hasNest,
5359  const PPCSubtarget &Subtarget) {
5360  // Function pointers in the 64-bit SVR4 ABI do not point to the function
5361  // entry point, but to the function descriptor (the function entry point
5362  // address is part of the function descriptor though).
5363  // The function descriptor is a three doubleword structure with the
5364  // following fields: function entry point, TOC base address and
5365  // environment pointer.
5366  // Thus for a call through a function pointer, the following actions need
5367  // to be performed:
5368  // 1. Save the TOC of the caller in the TOC save area of its stack
5369  // frame (this is done in LowerCall_Darwin() or LowerCall_64SVR4()).
5370  // 2. Load the address of the function entry point from the function
5371  // descriptor.
5372  // 3. Load the TOC of the callee from the function descriptor into r2.
5373  // 4. Load the environment pointer from the function descriptor into
5374  // r11.
5375  // 5. Branch to the function entry point address.
5376  // 6. On return of the callee, the TOC of the caller needs to be
5377  // restored (this is done in FinishCall()).
5378  //
5379  // The loads are scheduled at the beginning of the call sequence, and the
5380  // register copies are flagged together to ensure that no other
5381  // operations can be scheduled in between. E.g. without flagging the
5382  // copies together, a TOC access in the caller could be scheduled between
5383  // the assignment of the callee TOC and the branch to the callee, which leads
5384  // to incorrect code.
5385 
5386  // Start by loading the function address from the descriptor.
5387  SDValue LDChain = getOutputChainFromCallSeq(CallSeqStart);
5388  auto MMOFlags = Subtarget.hasInvariantFunctionDescriptors()
5392 
5393  MachinePointerInfo MPI(CB ? CB->getCalledOperand() : nullptr);
5394 
5395  // Registers used in building the DAG.
5396  const MCRegister EnvPtrReg = Subtarget.getEnvironmentPointerRegister();
5397  const MCRegister TOCReg = Subtarget.getTOCPointerRegister();
5398 
5399  // Offsets of descriptor members.
5400  const unsigned TOCAnchorOffset = Subtarget.descriptorTOCAnchorOffset();
5401  const unsigned EnvPtrOffset = Subtarget.descriptorEnvironmentPointerOffset();
5402 
5403  const MVT RegVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
5404  const unsigned Alignment = Subtarget.isPPC64() ? 8 : 4;
5405 
5406  // One load for the functions entry point address.
5407  SDValue LoadFuncPtr = DAG.getLoad(RegVT, dl, LDChain, Callee, MPI,
5408  Alignment, MMOFlags);
5409 
5410  // One for loading the TOC anchor for the module that contains the called
5411  // function.
5412  SDValue TOCOff = DAG.getIntPtrConstant(TOCAnchorOffset, dl);
5413  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, Callee, TOCOff);
5414  SDValue TOCPtr =
5415  DAG.getLoad(RegVT, dl, LDChain, AddTOC,
5416  MPI.getWithOffset(TOCAnchorOffset), Alignment, MMOFlags);
5417 
5418  // One for loading the environment pointer.
5419  SDValue PtrOff = DAG.getIntPtrConstant(EnvPtrOffset, dl);
5420  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, RegVT, Callee, PtrOff);
5421  SDValue LoadEnvPtr =
5422  DAG.getLoad(RegVT, dl, LDChain, AddPtr,
5423  MPI.getWithOffset(EnvPtrOffset), Alignment, MMOFlags);
5424 
5425 
5426  // Then copy the newly loaded TOC anchor to the TOC pointer.
5427  SDValue TOCVal = DAG.getCopyToReg(Chain, dl, TOCReg, TOCPtr, Glue);
5428  Chain = TOCVal.getValue(0);
5429  Glue = TOCVal.getValue(1);
5430 
5431  // If the function call has an explicit 'nest' parameter, it takes the
5432  // place of the environment pointer.
5433  assert((!hasNest || !Subtarget.isAIXABI()) &&
5434  "Nest parameter is not supported on AIX.");
5435  if (!hasNest) {
5436  SDValue EnvVal = DAG.getCopyToReg(Chain, dl, EnvPtrReg, LoadEnvPtr, Glue);
5437  Chain = EnvVal.getValue(0);
5438  Glue = EnvVal.getValue(1);
5439  }
5440 
5441  // The rest of the indirect call sequence is the same as the non-descriptor
5442  // DAG.
5443  prepareIndirectCall(DAG, LoadFuncPtr, Glue, Chain, dl);
5444 }
5445 
5446 static void
5448  PPCTargetLowering::CallFlags CFlags, const SDLoc &dl,
5449  SelectionDAG &DAG,
5450  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass,
5451  SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff,
5452  const PPCSubtarget &Subtarget) {
5453  const bool IsPPC64 = Subtarget.isPPC64();
5454  // MVT for a general purpose register.
5455  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
5456 
5457  // First operand is always the chain.
5458  Ops.push_back(Chain);
5459 
5460  // If it's a direct call pass the callee as the second operand.
5461  if (!CFlags.IsIndirect)
5462  Ops.push_back(Callee);
5463  else {
5464  assert(!CFlags.IsPatchPoint && "Patch point calls are not indirect.");
5465 
5466  // For the TOC based ABIs, we have saved the TOC pointer to the linkage area
5467  // on the stack (this would have been done in `LowerCall_64SVR4` or
5468  // `LowerCall_AIX`). The call instruction is a pseudo instruction that
5469  // represents both the indirect branch and a load that restores the TOC
5470  // pointer from the linkage area. The operand for the TOC restore is an add
5471  // of the TOC save offset to the stack pointer. This must be the second
5472  // operand: after the chain input but before any other variadic arguments.
5473  // For 64-bit ELFv2 ABI with PCRel, do not restore the TOC as it is not
5474  // saved or used.
5475  if (isTOCSaveRestoreRequired(Subtarget)) {
5476  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
5477 
5478  SDValue StackPtr = DAG.getRegister(StackPtrReg, RegVT);
5479  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
5480  SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
5481  SDValue AddTOC = DAG.getNode(ISD::ADD, dl, RegVT, StackPtr, TOCOff);
5482  Ops.push_back(AddTOC);
5483  }
5484 
5485  // Add the register used for the environment pointer.
5486  if (Subtarget.usesFunctionDescriptors() && !CFlags.HasNest)
5488  RegVT));
5489 
5490 
5491  // Add CTR register as callee so a bctr can be emitted later.
5492  if (CFlags.IsTailCall)
5493  Ops.push_back(DAG.getRegister(IsPPC64 ? PPC::CTR8 : PPC::CTR, RegVT));
5494  }
5495 
5496  // If this is a tail call add stack pointer delta.
5497  if (CFlags.IsTailCall)
5498  Ops.push_back(DAG.getConstant(SPDiff, dl, MVT::i32));
5499 
5500  // Add argument registers to the end of the list so that they are known live
5501  // into the call.
5502  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
5503  Ops.push_back(DAG.getRegister(RegsToPass[i].first,
5504  RegsToPass[i].second.getValueType()));
5505 
5506  // We cannot add R2/X2 as an operand here for PATCHPOINT, because there is
5507  // no way to mark dependencies as implicit here.
5508  // We will add the R2/X2 dependency in EmitInstrWithCustomInserter.
5509  if ((Subtarget.is64BitELFABI() || Subtarget.isAIXABI()) &&
5510  !CFlags.IsPatchPoint && !Subtarget.isUsingPCRelativeCalls())
5511  Ops.push_back(DAG.getRegister(Subtarget.getTOCPointerRegister(), RegVT));
5512 
5513  // Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
5514  if (CFlags.IsVarArg && Subtarget.is32BitELFABI())
5515  Ops.push_back(DAG.getRegister(PPC::CR1EQ, MVT::i32));
5516 
5517  // Add a register mask operand representing the call-preserved registers.
5518  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
5519  const uint32_t *Mask =
5520  TRI->getCallPreservedMask(DAG.getMachineFunction(), CFlags.CallConv);
5521  assert(Mask && "Missing call preserved mask for calling convention");
5522  Ops.push_back(DAG.getRegisterMask(Mask));
5523 
5524  // If the glue is valid, it is the last operand.
5525  if (Glue.getNode())
5526  Ops.push_back(Glue);
5527 }
5528 
5529 SDValue PPCTargetLowering::FinishCall(
5530  CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG,
5531  SmallVector<std::pair<unsigned, SDValue>, 8> &RegsToPass, SDValue Glue,
5532  SDValue Chain, SDValue CallSeqStart, SDValue &Callee, int SPDiff,
5533  unsigned NumBytes, const SmallVectorImpl<ISD::InputArg> &Ins,
5534  SmallVectorImpl<SDValue> &InVals, const CallBase *CB) const {
5535 
5536  if ((Subtarget.is64BitELFABI() && !Subtarget.isUsingPCRelativeCalls()) ||
5537  Subtarget.isAIXABI())
5538  setUsesTOCBasePtr(DAG);
5539 
5540  unsigned CallOpc =
5541  getCallOpcode(CFlags, DAG.getMachineFunction().getFunction(), Callee,
5542  Subtarget, DAG.getTarget(), CB ? CB->isStrictFP() : false);
5543 
5544  if (!CFlags.IsIndirect)
5545  Callee = transformCallee(Callee, DAG, dl, Subtarget);
5546  else if (Subtarget.usesFunctionDescriptors())
5547  prepareDescriptorIndirectCall(DAG, Callee, Glue, Chain, CallSeqStart, CB,
5548  dl, CFlags.HasNest, Subtarget);
5549  else
5550  prepareIndirectCall(DAG, Callee, Glue, Chain, dl);
5551 
5552  // Build the operand list for the call instruction.
5554  buildCallOperands(Ops, CFlags, dl, DAG, RegsToPass, Glue, Chain, Callee,
5555  SPDiff, Subtarget);
5556 
5557  // Emit tail call.
5558  if (CFlags.IsTailCall) {
5559  // Indirect tail call when using PC Relative calls do not have the same
5560  // constraints.
5561  assert(((Callee.getOpcode() == ISD::Register &&
5562  cast<RegisterSDNode>(Callee)->getReg() == PPC::CTR) ||
5563  Callee.getOpcode() == ISD::TargetExternalSymbol ||
5564  Callee.getOpcode() == ISD::TargetGlobalAddress ||
5565  isa<ConstantSDNode>(Callee) ||
5566  (CFlags.IsIndirect && Subtarget.isUsingPCRelativeCalls())) &&
5567  "Expecting a global address, external symbol, absolute value, "
5568  "register or an indirect tail call when PC Relative calls are "
5569  "used.");
5570  // PC Relative calls also use TC_RETURN as the way to mark tail calls.
5571  assert(CallOpc == PPCISD::TC_RETURN &&
5572  "Unexpected call opcode for a tail call.");
5574  return DAG.getNode(CallOpc, dl, MVT::Other, Ops);
5575  }
5576 
5577  std::array<EVT, 2> ReturnTypes = {{MVT::Other, MVT::Glue}};
5578  Chain = DAG.getNode(CallOpc, dl, ReturnTypes, Ops);
5579  DAG.addNoMergeSiteInfo(Chain.getNode(), CFlags.NoMerge);
5580  Glue = Chain.getValue(1);
5581 
5582  // When performing tail call optimization the callee pops its arguments off
5583  // the stack. Account for this here so these bytes can be pushed back on in
5584  // PPCFrameLowering::eliminateCallFramePseudoInstr.
5585  int BytesCalleePops = (CFlags.CallConv == CallingConv::Fast &&
5587  ? NumBytes
5588  : 0;
5589 
5590  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
5591  DAG.getIntPtrConstant(BytesCalleePops, dl, true),
5592  Glue, dl);
5593  Glue = Chain.getValue(1);
5594 
5595  return LowerCallResult(Chain, Glue, CFlags.CallConv, CFlags.IsVarArg, Ins, dl,
5596  DAG, InVals);
5597 }
5598 
5599 SDValue
5600 PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
5601  SmallVectorImpl<SDValue> &InVals) const {
5602  SelectionDAG &DAG = CLI.DAG;
5603  SDLoc &dl = CLI.DL;
5605  SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
5607  SDValue Chain = CLI.Chain;
5608  SDValue Callee = CLI.Callee;
5609  bool &isTailCall = CLI.IsTailCall;
5610  CallingConv::ID CallConv = CLI.CallConv;
5611  bool isVarArg = CLI.IsVarArg;
5612  bool isPatchPoint = CLI.IsPatchPoint;
5613  const CallBase *CB = CLI.CB;
5614 
5615  if (isTailCall) {
5616  if (Subtarget.useLongCalls() && !(CB && CB->isMustTailCall()))
5617  isTailCall = false;
5618  else if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
5619  isTailCall = IsEligibleForTailCallOptimization_64SVR4(
5620  Callee, CallConv, CB, isVarArg, Outs, Ins, DAG);
5621  else
5622  isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
5623  Ins, DAG);
5624  if (isTailCall) {
5625  ++NumTailCalls;
5626  if (!getTargetMachine().Options.GuaranteedTailCallOpt)
5627  ++NumSiblingCalls;
5628 
5629  // PC Relative calls no longer guarantee that the callee is a Global
5630  // Address Node. The callee could be an indirect tail call in which
5631  // case the SDValue for the callee could be a load (to load the address
5632  // of a function pointer) or it may be a register copy (to move the
5633  // address of the callee from a function parameter into a virtual
5634  // register). It may also be an ExternalSymbolSDNode (ex memcopy).
5635  assert((Subtarget.isUsingPCRelativeCalls() ||
5636  isa<GlobalAddressSDNode>(Callee)) &&
5637  "Callee should be an llvm::Function object.");
5638 
5639  LLVM_DEBUG(dbgs() << "TCO caller: " << DAG.getMachineFunction().getName()
5640  << "\nTCO callee: ");
5641  LLVM_DEBUG(Callee.dump());
5642  }
5643  }
5644 
5645  if (!isTailCall && CB && CB->isMustTailCall())
5646  report_fatal_error("failed to perform tail call elimination on a call "
5647  "site marked musttail");
5648 
5649  // When long calls (i.e. indirect calls) are always used, calls are always
5650  // made via function pointer. If we have a function name, first translate it
5651  // into a pointer.
5652  if (Subtarget.useLongCalls() && isa<GlobalAddressSDNode>(Callee) &&
5653  !isTailCall)
5654  Callee = LowerGlobalAddress(Callee, DAG);
5655 
5656  CallFlags CFlags(
5657  CallConv, isTailCall, isVarArg, isPatchPoint,
5658  isIndirectCall(Callee, DAG, Subtarget, isPatchPoint),
5659  // hasNest
5660  Subtarget.is64BitELFABI() &&
5661  any_of(Outs, [](ISD::OutputArg Arg) { return Arg.Flags.isNest(); }),
5662  CLI.NoMerge);
5663 
5664  if (Subtarget.isAIXABI())
5665  return LowerCall_AIX(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5666  InVals, CB);
5667 
5668  assert(Subtarget.isSVR4ABI());
5669  if (Subtarget.isPPC64())
5670  return LowerCall_64SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5671  InVals, CB);
5672  return LowerCall_32SVR4(Chain, Callee, CFlags, Outs, OutVals, Ins, dl, DAG,
5673  InVals, CB);
5674 }
5675 
5676 SDValue PPCTargetLowering::LowerCall_32SVR4(
5677  SDValue Chain, SDValue Callee, CallFlags CFlags,
5678  const SmallVectorImpl<ISD::OutputArg> &Outs,
5679  const SmallVectorImpl<SDValue> &OutVals,
5680  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5681  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5682  const CallBase *CB) const {
5683  // See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
5684  // of the 32-bit SVR4 ABI stack frame layout.
5685 
5686  const CallingConv::ID CallConv = CFlags.CallConv;
5687  const bool IsVarArg = CFlags.IsVarArg;
5688  const bool IsTailCall = CFlags.IsTailCall;
5689 
5690  assert((CallConv == CallingConv::C ||
5691  CallConv == CallingConv::Cold ||
5692  CallConv == CallingConv::Fast) && "Unknown calling convention!");
5693 
5694  const Align PtrAlign(4);
5695 
5696  MachineFunction &MF = DAG.getMachineFunction();
5697 
5698  // Mark this function as potentially containing a function that contains a
5699  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5700  // and restoring the callers stack pointer in this functions epilog. This is
5701  // done because by tail calling the called function might overwrite the value
5702  // in this function's (MF) stack pointer stack slot 0(SP).
5703  if (getTargetMachine().Options.GuaranteedTailCallOpt &&
5704  CallConv == CallingConv::Fast)
5705  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5706 
5707  // Count how many bytes are to be pushed on the stack, including the linkage
5708  // area, parameter list area and the part of the local variable space which
5709  // contains copies of aggregates which are passed by value.
5710 
5711  // Assign locations to all of the outgoing arguments.
5713  PPCCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
5714 
5715  // Reserve space for the linkage area on the stack.
5716  CCInfo.AllocateStack(Subtarget.getFrameLowering()->getLinkageSize(),
5717  PtrAlign);
5718  if (useSoftFloat())
5719  CCInfo.PreAnalyzeCallOperands(Outs);
5720 
5721  if (IsVarArg) {
5722  // Handle fixed and variable vector arguments differently.
5723  // Fixed vector arguments go into registers as long as registers are
5724  // available. Variable vector arguments always go into memory.
5725  unsigned NumArgs = Outs.size();
5726 
5727  for (unsigned i = 0; i != NumArgs; ++i) {
5728  MVT ArgVT = Outs[i].VT;
5729  ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
5730  bool Result;
5731 
5732  if (Outs[i].IsFixed) {
5733  Result = CC_PPC32_SVR4(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags,
5734  CCInfo);
5735  } else {
5736  Result = CC_PPC32_SVR4_VarArg(i, ArgVT, ArgVT, CCValAssign::Full,
5737  ArgFlags, CCInfo);
5738  }
5739 
5740  if (Result) {
5741 #ifndef NDEBUG
5742  errs() << "Call operand #" << i << " has unhandled type "
5743  << EVT(ArgVT).getEVTString() << "\n";
5744 #endif
5745  llvm_unreachable(nullptr);
5746  }
5747  }
5748  } else {
5749  // All arguments are treated the same.
5750  CCInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4);
5751  }
5752  CCInfo.clearWasPPCF128();
5753 
5754  // Assign locations to all of the outgoing aggregate by value arguments.
5755  SmallVector<CCValAssign, 16> ByValArgLocs;
5756  CCState CCByValInfo(CallConv, IsVarArg, MF, ByValArgLocs, *DAG.getContext());
5757 
5758  // Reserve stack space for the allocations in CCInfo.
5759  CCByValInfo.AllocateStack(CCInfo.getNextStackOffset(), PtrAlign);
5760 
5761  CCByValInfo.AnalyzeCallOperands(Outs, CC_PPC32_SVR4_ByVal);
5762 
5763  // Size of the linkage area, parameter list area and the part of the local
5764  // space variable where copies of aggregates which are passed by value are
5765  // stored.
5766  unsigned NumBytes = CCByValInfo.getNextStackOffset();
5767 
5768  // Calculate by how many bytes the stack has to be adjusted in case of tail
5769  // call optimization.
5770  int SPDiff = CalculateTailCallSPDiff(DAG, IsTailCall, NumBytes);
5771 
5772  // Adjust the stack pointer for the new arguments...
5773  // These operations are automatically eliminated by the prolog/epilog pass
5774  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
5775  SDValue CallSeqStart = Chain;
5776 
5777  // Load the return address and frame pointer so it can be moved somewhere else
5778  // later.
5779  SDValue LROp, FPOp;
5780  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
5781 
5782  // Set up a copy of the stack pointer for use loading and storing any
5783  // arguments that may not fit in the registers available for argument
5784  // passing.
5785  SDValue StackPtr = DAG.getRegister(PPC::R1, MVT::i32);
5786 
5788  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
5789  SmallVector<SDValue, 8> MemOpChains;
5790 
5791  bool seenFloatArg = false;
5792  // Walk the register/memloc assignments, inserting copies/loads.
5793  // i - Tracks the index into the list of registers allocated for the call
5794  // RealArgIdx - Tracks the index into the list of actual function arguments
5795  // j - Tracks the index into the list of byval arguments
5796  for (unsigned i = 0, RealArgIdx = 0, j = 0, e = ArgLocs.size();
5797  i != e;
5798  ++i, ++RealArgIdx) {
5799  CCValAssign &VA = ArgLocs[i];
5800  SDValue Arg = OutVals[RealArgIdx];
5801  ISD::ArgFlagsTy Flags = Outs[RealArgIdx].Flags;
5802 
5803  if (Flags.isByVal()) {
5804  // Argument is an aggregate which is passed by value, thus we need to
5805  // create a copy of it in the local variable space of the current stack
5806  // frame (which is the stack frame of the caller) and pass the address of
5807  // this copy to the callee.
5808  assert((j < ByValArgLocs.size()) && "Index out of bounds!");
5809  CCValAssign &ByValVA = ByValArgLocs[j++];
5810  assert((VA.getValNo() == ByValVA.getValNo()) && "ValNo mismatch!");
5811 
5812  // Memory reserved in the local variable space of the callers stack frame.
5813  unsigned LocMemOffset = ByValVA.getLocMemOffset();
5814 
5815  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5816  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5817  StackPtr, PtrOff);
5818 
5819  // Create a copy of the argument in the local area of the current
5820  // stack frame.
5821  SDValue MemcpyCall =
5823  CallSeqStart.getNode()->getOperand(0),
5824  Flags, DAG, dl);
5825 
5826  // This must go outside the CALLSEQ_START..END.
5827  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, NumBytes, 0,
5828  SDLoc(MemcpyCall));
5829  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5830  NewCallSeqStart.getNode());
5831  Chain = CallSeqStart = NewCallSeqStart;
5832 
5833  // Pass the address of the aggregate copy on the stack either in a
5834  // physical register or in the parameter list area of the current stack
5835  // frame to the callee.
5836  Arg = PtrOff;
5837  }
5838 
5839  // When useCRBits() is true, there can be i1 arguments.
5840  // It is because getRegisterType(MVT::i1) => MVT::i1,
5841  // and for other integer types getRegisterType() => MVT::i32.
5842  // Extend i1 and ensure callee will get i32.
5843  if (Arg.getValueType() == MVT::i1)
5845  dl, MVT::i32, Arg);
5846 
5847  if (VA.isRegLoc()) {
5848  seenFloatArg |= VA.getLocVT().isFloatingPoint();
5849  // Put argument in a physical register.
5850  if (Subtarget.hasSPE() && Arg.getValueType() == MVT::f64) {
5851  bool IsLE = Subtarget.isLittleEndian();
5852  SDValue SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5853  DAG.getIntPtrConstant(IsLE ? 0 : 1, dl));
5854  RegsToPass.push_back(std::make_pair(VA.getLocReg(), SVal.getValue(0)));
5855  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
5856  DAG.getIntPtrConstant(IsLE ? 1 : 0, dl));
5857  RegsToPass.push_back(std::make_pair(ArgLocs[++i].getLocReg(),
5858  SVal.getValue(0)));
5859  } else
5860  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
5861  } else {
5862  // Put argument in the parameter list area of the current stack frame.
5863  assert(VA.isMemLoc());
5864  unsigned LocMemOffset = VA.getLocMemOffset();
5865 
5866  if (!IsTailCall) {
5867  SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
5868  PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(MF.getDataLayout()),
5869  StackPtr, PtrOff);
5870 
5871  MemOpChains.push_back(
5872  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
5873  } else {
5874  // Calculate and remember argument location.
5875  CalculateTailCallArgDest(DAG, MF, false, Arg, SPDiff, LocMemOffset,
5876  TailCallArguments);
5877  }
5878  }
5879  }
5880 
5881  if (!MemOpChains.empty())
5882  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
5883 
5884  // Build a sequence of copy-to-reg nodes chained together with token chain
5885  // and flag operands which copy the outgoing args into the appropriate regs.
5886  SDValue InFlag;
5887  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
5888  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
5889  RegsToPass[i].second, InFlag);
5890  InFlag = Chain.getValue(1);
5891  }
5892 
5893  // Set CR bit 6 to true if this is a vararg call with floating args passed in
5894  // registers.
5895  if (IsVarArg) {
5896  SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
5897  SDValue Ops[] = { Chain, InFlag };
5898 
5899  Chain = DAG.getNode(seenFloatArg ? PPCISD::CR6SET : PPCISD::CR6UNSET,
5900  dl, VTs, makeArrayRef(Ops, InFlag.getNode() ? 2 : 1));
5901 
5902  InFlag = Chain.getValue(1);
5903  }
5904 
5905  if (IsTailCall)
5906  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
5907  TailCallArguments);
5908 
5909  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
5910  Callee, SPDiff, NumBytes, Ins, InVals, CB);
5911 }
5912 
5913 // Copy an argument into memory, being careful to do this outside the
5914 // call sequence for the call to which the argument belongs.
5915 SDValue PPCTargetLowering::createMemcpyOutsideCallSeq(
5916  SDValue Arg, SDValue PtrOff, SDValue CallSeqStart, ISD::ArgFlagsTy Flags,
5917  SelectionDAG &DAG, const SDLoc &dl) const {
5918  SDValue MemcpyCall = CreateCopyOfByValArgument(Arg, PtrOff,
5919  CallSeqStart.getNode()->getOperand(0),
5920  Flags, DAG, dl);
5921  // The MEMCPY must go outside the CALLSEQ_START..END.
5922  int64_t FrameSize = CallSeqStart.getConstantOperandVal(1);
5923  SDValue NewCallSeqStart = DAG.getCALLSEQ_START(MemcpyCall, FrameSize, 0,
5924  SDLoc(MemcpyCall));
5925  DAG.ReplaceAllUsesWith(CallSeqStart.getNode(),
5926  NewCallSeqStart.getNode());
5927  return NewCallSeqStart;
5928 }
5929 
5930 SDValue PPCTargetLowering::LowerCall_64SVR4(
5931  SDValue Chain, SDValue Callee, CallFlags CFlags,
5932  const SmallVectorImpl<ISD::OutputArg> &Outs,
5933  const SmallVectorImpl<SDValue> &OutVals,
5934  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
5935  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
5936  const CallBase *CB) const {
5937  bool isELFv2ABI = Subtarget.isELFv2ABI();
5938  bool isLittleEndian = Subtarget.isLittleEndian();
5939  unsigned NumOps = Outs.size();
5940  bool IsSibCall = false;
5941  bool IsFastCall = CFlags.CallConv == CallingConv::Fast;
5942 
5943  EVT PtrVT = getPointerTy(DAG.getDataLayout());
5944  unsigned PtrByteSize = 8;
5945 
5946  MachineFunction &MF = DAG.getMachineFunction();
5947 
5948  if (CFlags.IsTailCall && !getTargetMachine().Options.GuaranteedTailCallOpt)
5949  IsSibCall = true;
5950 
5951  // Mark this function as potentially containing a function that contains a
5952  // tail call. As a consequence the frame pointer will be used for dynamicalloc
5953  // and restoring the callers stack pointer in this functions epilog. This is
5954  // done because by tail calling the called function might overwrite the value
5955  // in this function's (MF) stack pointer stack slot 0(SP).
5956  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
5957  MF.getInfo<PPCFunctionInfo>()->setHasFastCall();
5958 
5959  assert(!(IsFastCall && CFlags.IsVarArg) &&
5960  "fastcc not supported on varargs functions");
5961 
5962  // Count how many bytes are to be pushed on the stack, including the linkage
5963  // area, and parameter passing area. On ELFv1, the linkage area is 48 bytes
5964  // reserved space for [SP][CR][LR][2 x unused][TOC]; on ELFv2, the linkage
5965  // area is 32 bytes reserved space for [SP][CR][LR][TOC].
5966  unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
5967  unsigned NumBytes = LinkageSize;
5968  unsigned GPR_idx = 0, FPR_idx = 0, VR_idx = 0;
5969 
5970  static const MCPhysReg GPR[] = {
5971  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
5972  PPC::X7, PPC::X8, PPC::X9, PPC::X10,
5973  };
5974  static const MCPhysReg VR[] = {
5975  PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8,
5976  PPC::V9, PPC::V10, PPC::V11, PPC::V12, PPC::V13
5977  };
5978 
5979  const unsigned NumGPRs = array_lengthof(GPR);
5980  const unsigned NumFPRs = useSoftFloat() ? 0 : 13;
5981  const unsigned NumVRs = array_lengthof(VR);
5982 
5983  // On ELFv2, we can avoid allocating the parameter area if all the arguments
5984  // can be passed to the callee in registers.
5985  // For the fast calling convention, there is another check below.
5986  // Note: We should keep consistent with LowerFormalArguments_64SVR4()
5987  bool HasParameterArea = !isELFv2ABI || CFlags.IsVarArg || IsFastCall;
5988  if (!HasParameterArea) {
5989  unsigned ParamAreaSize = NumGPRs * PtrByteSize;
5990  unsigned AvailableFPRs = NumFPRs;
5991  unsigned AvailableVRs = NumVRs;
5992  unsigned NumBytesTmp = NumBytes;
5993  for (unsigned i = 0; i != NumOps; ++i) {
5994  if (Outs[i].Flags.isNest()) continue;
5995  if (CalculateStackSlotUsed(Outs[i].VT, Outs[i].ArgVT, Outs[i].Flags,
5996  PtrByteSize, LinkageSize, ParamAreaSize,
5997  NumBytesTmp, AvailableFPRs, AvailableVRs))
5998  HasParameterArea = true;
5999  }
6000  }
6001 
6002  // When using the fast calling convention, we don't provide backing for
6003  // arguments that will be in registers.
6004  unsigned NumGPRsUsed = 0, NumFPRsUsed = 0, NumVRsUsed = 0;
6005 
6006  // Avoid allocating parameter area for fastcc functions if all the arguments
6007  // can be passed in the registers.
6008  if (IsFastCall)
6009  HasParameterArea = false;
6010 
6011  // Add up all the space actually used.
6012  for (unsigned i = 0; i != NumOps; ++i) {
6013  ISD::ArgFlagsTy Flags = Outs[i].Flags;
6014  EVT ArgVT = Outs[i].VT;
6015  EVT OrigVT = Outs[i].ArgVT;
6016 
6017  if (Flags.isNest())
6018  continue;
6019 
6020  if (IsFastCall) {
6021  if (Flags.isByVal()) {
6022  NumGPRsUsed += (Flags.getByValSize()+7)/8;
6023  if (NumGPRsUsed > NumGPRs)
6024  HasParameterArea = true;
6025  } else {
6026  switch (ArgVT.getSimpleVT().SimpleTy) {
6027  default: llvm_unreachable("Unexpected ValueType for argument!");
6028  case MVT::i1:
6029  case MVT::i32:
6030  case MVT::i64:
6031  if (++NumGPRsUsed <= NumGPRs)
6032  continue;
6033  break;
6034  case MVT::v4i32:
6035  case MVT::v8i16:
6036  case MVT::v16i8:
6037  case MVT::v2f64:
6038  case MVT::v2i64:
6039  case MVT::v1i128:
6040  case MVT::f128:
6041  if (++NumVRsUsed <= NumVRs)
6042  continue;
6043  break;
6044  case MVT::v4f32:
6045  if (++NumVRsUsed <= NumVRs)
6046  continue;
6047  break;
6048  case MVT::f32:
6049  case MVT::f64:
6050  if (++NumFPRsUsed <= NumFPRs)
6051  continue;
6052  break;
6053  }
6054  HasParameterArea = true;
6055  }
6056  }
6057 
6058  /* Respect alignment of argument on the stack. */
6059  auto Alignement =
6060  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6061  NumBytes = alignTo(NumBytes, Alignement);
6062 
6063  NumBytes += CalculateStackSlotSize(ArgVT, Flags, PtrByteSize);
6064  if (Flags.isInConsecutiveRegsLast())
6065  NumBytes = ((NumBytes + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6066  }
6067 
6068  unsigned NumBytesActuallyUsed = NumBytes;
6069 
6070  // In the old ELFv1 ABI,
6071  // the prolog code of the callee may store up to 8 GPR argument registers to
6072  // the stack, allowing va_start to index over them in memory if its varargs.
6073  // Because we cannot tell if this is needed on the caller side, we have to
6074  // conservatively assume that it is needed. As such, make sure we have at
6075  // least enough stack space for the caller to store the 8 GPRs.
6076  // In the ELFv2 ABI, we allocate the parameter area iff a callee
6077  // really requires memory operands, e.g. a vararg function.
6078  if (HasParameterArea)
6079  NumBytes = std::max(NumBytes, LinkageSize + 8 * PtrByteSize);
6080  else
6081  NumBytes = LinkageSize;
6082 
6083  // Tail call needs the stack to be aligned.
6084  if (getTargetMachine().Options.GuaranteedTailCallOpt && IsFastCall)
6085  NumBytes = EnsureStackAlignment(Subtarget.getFrameLowering(), NumBytes);
6086 
6087  int SPDiff = 0;
6088 
6089  // Calculate by how many bytes the stack has to be adjusted in case of tail
6090  // call optimization.
6091  if (!IsSibCall)
6092  SPDiff = CalculateTailCallSPDiff(DAG, CFlags.IsTailCall, NumBytes);
6093 
6094  // To protect arguments on the stack from being clobbered in a tail call,
6095  // force all the loads to happen before doing any other lowering.
6096  if (CFlags.IsTailCall)
6097  Chain = DAG.getStackArgumentTokenFactor(Chain);
6098 
6099  // Adjust the stack pointer for the new arguments...
6100  // These operations are automatically eliminated by the prolog/epilog pass
6101  if (!IsSibCall)
6102  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
6103  SDValue CallSeqStart = Chain;
6104 
6105  // Load the return address and frame pointer so it can be move somewhere else
6106  // later.
6107  SDValue LROp, FPOp;
6108  Chain = EmitTailCallLoadFPAndRetAddr(DAG, SPDiff, Chain, LROp, FPOp, dl);
6109 
6110  // Set up a copy of the stack pointer for use loading and storing any
6111  // arguments that may not fit in the registers available for argument
6112  // passing.
6113  SDValue StackPtr = DAG.getRegister(PPC::X1, MVT::i64);
6114 
6115  // Figure out which arguments are going to go in registers, and which in
6116  // memory. Also, if this is a vararg function, floating point operations
6117  // must be stored to our stack, and loaded into integer regs as well, if
6118  // any integer regs are available for argument passing.
6119  unsigned ArgOffset = LinkageSize;
6120 
6122  SmallVector<TailCallArgumentInfo, 8> TailCallArguments;
6123 
6124  SmallVector<SDValue, 8> MemOpChains;
6125  for (unsigned i = 0; i != NumOps; ++i) {
6126  SDValue Arg = OutVals[i];
6127  ISD::ArgFlagsTy Flags = Outs[i].Flags;
6128  EVT ArgVT = Outs[i].VT;
6129  EVT OrigVT = Outs[i].ArgVT;
6130 
6131  // PtrOff will be used to store the current argument to the stack if a
6132  // register cannot be found for it.
6133  SDValue PtrOff;
6134 
6135  // We re-align the argument offset for each argument, except when using the
6136  // fast calling convention, when we need to make sure we do that only when
6137  // we'll actually use a stack slot.
6138  auto ComputePtrOff = [&]() {
6139  /* Respect alignment of argument on the stack. */
6140  auto Alignment =
6141  CalculateStackSlotAlignment(ArgVT, OrigVT, Flags, PtrByteSize);
6142  ArgOffset = alignTo(ArgOffset, Alignment);
6143 
6144  PtrOff = DAG.getConstant(ArgOffset, dl, StackPtr.getValueType());
6145 
6146  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6147  };
6148 
6149  if (!IsFastCall) {
6150  ComputePtrOff();
6151 
6152  /* Compute GPR index associated with argument offset. */
6153  GPR_idx = (ArgOffset - LinkageSize) / PtrByteSize;
6154  GPR_idx = std::min(GPR_idx, NumGPRs);
6155  }
6156 
6157  // Promote integers to 64-bit values.
6158  if (Arg.getValueType() == MVT::i32 || Arg.getValueType() == MVT::i1) {
6159  // FIXME: Should this use ANY_EXTEND if neither sext nor zext?
6160  unsigned ExtOp = Flags.isSExt() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
6161  Arg = DAG.getNode(ExtOp, dl, MVT::i64, Arg);
6162  }
6163 
6164  // FIXME memcpy is used way more than necessary. Correctness first.
6165  // Note: "by value" is code for passing a structure by value, not
6166  // basic types.
6167  if (Flags.isByVal()) {
6168  // Note: Size includes alignment padding, so
6169  // struct x { short a; char b; }
6170  // will have Size = 4. With #pragma pack(1), it will have Size = 3.
6171  // These are the proper values we need for right-justifying the
6172  // aggregate in a parameter register.
6173  unsigned Size = Flags.getByValSize();
6174 
6175  // An empty aggregate parameter takes up no storage and no
6176  // registers.
6177  if (Size == 0)
6178  continue;
6179 
6180  if (IsFastCall)
6181  ComputePtrOff();
6182 
6183  // All aggregates smaller than 8 bytes must be passed right-justified.
6184  if (Size==1 || Size==2 || Size==4) {
6185  EVT VT = (Size==1) ? MVT::i8 : ((Size==2) ? MVT::i16 : MVT::i32);
6186  if (GPR_idx != NumGPRs) {
6187  SDValue Load = DAG.getExtLoad(ISD::EXTLOAD, dl, PtrVT, Chain, Arg,
6188  MachinePointerInfo(), VT);
6189  MemOpChains.push_back(Load.getValue(1));
6190  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6191 
6192  ArgOffset += PtrByteSize;
6193  continue;
6194  }
6195  }
6196 
6197  if (GPR_idx == NumGPRs && Size < 8) {
6198  SDValue AddPtr = PtrOff;
6199  if (!isLittleEndian) {
6200  SDValue Const = DAG.getConstant(PtrByteSize - Size, dl,
6201  PtrOff.getValueType());
6202  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6203  }
6204  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6205  CallSeqStart,
6206  Flags, DAG, dl);
6207  ArgOffset += PtrByteSize;
6208  continue;
6209  }
6210  // Copy the object to parameter save area if it can not be entirely passed
6211  // by registers.
6212  // FIXME: we only need to copy the parts which need to be passed in
6213  // parameter save area. For the parts passed by registers, we don't need
6214  // to copy them to the stack although we need to allocate space for them
6215  // in parameter save area.
6216  if ((NumGPRs - GPR_idx) * PtrByteSize < Size)
6217  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, PtrOff,
6218  CallSeqStart,
6219  Flags, DAG, dl);
6220 
6221  // When a register is available, pass a small aggregate right-justified.
6222  if (Size < 8 && GPR_idx != NumGPRs) {
6223  // The easiest way to get this right-justified in a register
6224  // is to copy the structure into the rightmost portion of a
6225  // local variable slot, then load the whole slot into the
6226  // register.
6227  // FIXME: The memcpy seems to produce pretty awful code for
6228  // small aggregates, particularly for packed ones.
6229  // FIXME: It would be preferable to use the slot in the
6230  // parameter save area instead of a new local variable.
6231  SDValue AddPtr = PtrOff;
6232  if (!isLittleEndian) {
6233  SDValue Const = DAG.getConstant(8 - Size, dl, PtrOff.getValueType());
6234  AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, Const);
6235  }
6236  Chain = CallSeqStart = createMemcpyOutsideCallSeq(Arg, AddPtr,
6237  CallSeqStart,
6238  Flags, DAG, dl);
6239 
6240  // Load the slot into the register.
6241  SDValue Load =
6242  DAG.getLoad(PtrVT, dl, Chain, PtrOff, MachinePointerInfo());
6243  MemOpChains.push_back(Load.getValue(1));
6244  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6245 
6246  // Done with this argument.
6247  ArgOffset += PtrByteSize;
6248  continue;
6249  }
6250 
6251  // For aggregates larger than PtrByteSize, copy the pieces of the
6252  // object that fit into registers from the parameter save area.
6253  for (unsigned j=0; j<Size; j+=PtrByteSize) {
6254  SDValue Const = DAG.getConstant(j, dl, PtrOff.getValueType());
6255  SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const);
6256  if (GPR_idx != NumGPRs) {
6257  SDValue Load =
6258  DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo());
6259  MemOpChains.push_back(Load.getValue(1));
6260  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6261  ArgOffset += PtrByteSize;
6262  } else {
6263  ArgOffset += ((Size - j + PtrByteSize-1)/PtrByteSize)*PtrByteSize;
6264  break;
6265  }
6266  }
6267  continue;
6268  }
6269 
6270  switch (Arg.getSimpleValueType().SimpleTy) {
6271  default: llvm_unreachable("Unexpected ValueType for argument!");
6272  case MVT::i1:
6273  case MVT::i32:
6274  case MVT::i64:
6275  if (Flags.isNest()) {
6276  // The 'nest' parameter, if any, is passed in R11.
6277  RegsToPass.push_back(std::make_pair(PPC::X11, Arg));
6278  break;
6279  }
6280 
6281  // These can be scalar arguments or elements of an integer array type
6282  // passed directly. Clang may use those instead of "byval" aggregate
6283  // types to avoid forcing arguments to memory unnecessarily.
6284  if (GPR_idx != NumGPRs) {
6285  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Arg));
6286  } else {
6287  if (IsFastCall)
6288  ComputePtrOff();
6289 
6290  assert(HasParameterArea &&
6291  "Parameter area must exist to pass an argument in memory.");
6292  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6293  true, CFlags.IsTailCall, false, MemOpChains,
6294  TailCallArguments, dl);
6295  if (IsFastCall)
6296  ArgOffset += PtrByteSize;
6297  }
6298  if (!IsFastCall)
6299  ArgOffset += PtrByteSize;
6300  break;
6301  case MVT::f32:
6302  case MVT::f64: {
6303  // These can be scalar arguments or elements of a float array type
6304  // passed directly. The latter are used to implement ELFv2 homogenous
6305  // float aggregates.
6306 
6307  // Named arguments go into FPRs first, and once they overflow, the
6308  // remaining arguments go into GPRs and then the parameter save area.
6309  // Unnamed arguments for vararg functions always go to GPRs and
6310  // then the parameter save area. For now, put all arguments to vararg
6311  // routines always in both locations (FPR *and* GPR or stack slot).
6312  bool NeedGPROrStack = CFlags.IsVarArg || FPR_idx == NumFPRs;
6313  bool NeededLoad = false;
6314 
6315  // First load the argument into the next available FPR.
6316  if (FPR_idx != NumFPRs)
6317  RegsToPass.push_back(std::make_pair(FPR[FPR_idx++], Arg));
6318 
6319  // Next, load the argument into GPR or stack slot if needed.
6320  if (!NeedGPROrStack)
6321  ;
6322  else if (GPR_idx != NumGPRs && !IsFastCall) {
6323  // FIXME: We may want to re-enable this for CallingConv::Fast on the P8
6324  // once we support fp <-> gpr moves.
6325 
6326  // In the non-vararg case, this can only ever happen in the
6327  // presence of f32 array types, since otherwise we never run
6328  // out of FPRs before running out of GPRs.
6329  SDValue ArgVal;
6330 
6331  // Double values are always passed in a single GPR.
6332  if (Arg.getValueType() != MVT::f32) {
6333  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i64, Arg);
6334 
6335  // Non-array float values are extended and passed in a GPR.
6336  } else if (!Flags.isInConsecutiveRegs()) {
6337  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6338  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6339 
6340  // If we have an array of floats, we collect every odd element
6341  // together with its predecessor into one GPR.
6342  } else if (ArgOffset % PtrByteSize != 0) {
6343  SDValue Lo, Hi;
6344  Lo = DAG.getNode(ISD::BITCAST, dl, MVT::i32, OutVals[i - 1]);
6345  Hi = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6346  if (!isLittleEndian)
6347  std::swap(Lo, Hi);
6348  ArgVal = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
6349 
6350  // The final element, if even, goes into the first half of a GPR.
6351  } else if (Flags.isInConsecutiveRegsLast()) {
6352  ArgVal = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Arg);
6353  ArgVal = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i64, ArgVal);
6354  if (!isLittleEndian)
6355  ArgVal = DAG.getNode(ISD::SHL, dl, MVT::i64, ArgVal,
6356  DAG.getConstant(32, dl, MVT::i32));
6357 
6358  // Non-final even elements are skipped; they will be handled
6359  // together the with subsequent argument on the next go-around.
6360  } else
6361  ArgVal = SDValue();
6362 
6363  if (ArgVal.getNode())
6364  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], ArgVal));
6365  } else {
6366  if (IsFastCall)
6367  ComputePtrOff();
6368 
6369  // Single-precision floating-point values are mapped to the
6370  // second (rightmost) word of the stack doubleword.
6371  if (Arg.getValueType() == MVT::f32 &&
6372  !isLittleEndian && !Flags.isInConsecutiveRegs()) {
6373  SDValue ConstFour = DAG.getConstant(4, dl, PtrOff.getValueType());
6374  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff, ConstFour);
6375  }
6376 
6377  assert(HasParameterArea &&
6378  "Parameter area must exist to pass an argument in memory.");
6379  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6380  true, CFlags.IsTailCall, false, MemOpChains,
6381  TailCallArguments, dl);
6382 
6383  NeededLoad = true;
6384  }
6385  // When passing an array of floats, the array occupies consecutive
6386  // space in the argument area; only round up to the next doubleword
6387  // at the end of the array. Otherwise, each float takes 8 bytes.
6388  if (!IsFastCall || NeededLoad) {
6389  ArgOffset += (Arg.getValueType() == MVT::f32 &&
6390  Flags.isInConsecutiveRegs()) ? 4 : 8;
6391  if (Flags.isInConsecutiveRegsLast())
6392  ArgOffset = ((ArgOffset + PtrByteSize - 1)/PtrByteSize) * PtrByteSize;
6393  }
6394  break;
6395  }
6396  case MVT::v4f32:
6397  case MVT::v4i32:
6398  case MVT::v8i16:
6399  case MVT::v16i8:
6400  case MVT::v2f64:
6401  case MVT::v2i64:
6402  case MVT::v1i128:
6403  case MVT::f128:
6404  // These can be scalar arguments or elements of a vector array type
6405  // passed directly. The latter are used to implement ELFv2 homogenous
6406  // vector aggregates.
6407 
6408  // For a varargs call, named arguments go into VRs or on the stack as
6409  // usual; unnamed arguments always go to the stack or the corresponding
6410  // GPRs when within range. For now, we always put the value in both
6411  // locations (or even all three).
6412  if (CFlags.IsVarArg) {
6413  assert(HasParameterArea &&
6414  "Parameter area must exist if we have a varargs call.");
6415  // We could elide this store in the case where the object fits
6416  // entirely in R registers. Maybe later.
6417  SDValue Store =
6418  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
6419  MemOpChains.push_back(Store);
6420  if (VR_idx != NumVRs) {
6421  SDValue Load =
6422  DAG.getLoad(MVT::v4f32, dl, Store, PtrOff, MachinePointerInfo());
6423  MemOpChains.push_back(Load.getValue(1));
6424  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Load));
6425  }
6426  ArgOffset += 16;
6427  for (unsigned i=0; i<16; i+=PtrByteSize) {
6428  if (GPR_idx == NumGPRs)
6429  break;
6430  SDValue Ix = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
6431  DAG.getConstant(i, dl, PtrVT));
6432  SDValue Load =
6433  DAG.getLoad(PtrVT, dl, Store, Ix, MachinePointerInfo());
6434  MemOpChains.push_back(Load.getValue(1));
6435  RegsToPass.push_back(std::make_pair(GPR[GPR_idx++], Load));
6436  }
6437  break;
6438  }
6439 
6440  // Non-varargs Altivec params go into VRs or on the stack.
6441  if (VR_idx != NumVRs) {
6442  RegsToPass.push_back(std::make_pair(VR[VR_idx++], Arg));
6443  } else {
6444  if (IsFastCall)
6445  ComputePtrOff();
6446 
6447  assert(HasParameterArea &&
6448  "Parameter area must exist to pass an argument in memory.");
6449  LowerMemOpCallTo(DAG, MF, Chain, Arg, PtrOff, SPDiff, ArgOffset,
6450  true, CFlags.IsTailCall, true, MemOpChains,
6451  TailCallArguments, dl);
6452  if (IsFastCall)
6453  ArgOffset += 16;
6454  }
6455 
6456  if (!IsFastCall)
6457  ArgOffset += 16;
6458  break;
6459  }
6460  }
6461 
6462  assert((!HasParameterArea || NumBytesActuallyUsed == ArgOffset) &&
6463  "mismatch in size of parameter area");
6464  (void)NumBytesActuallyUsed;
6465 
6466  if (!MemOpChains.empty())
6467  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
6468 
6469  // Check if this is an indirect call (MTCTR/BCTRL).
6470  // See prepareDescriptorIndirectCall and buildCallOperands for more
6471  // information about calls through function pointers in the 64-bit SVR4 ABI.
6472  if (CFlags.IsIndirect) {
6473  // For 64-bit ELFv2 ABI with PCRel, do not save the TOC of the
6474  // caller in the TOC save area.
6475  if (isTOCSaveRestoreRequired(Subtarget)) {
6476  assert(!CFlags.IsTailCall && "Indirect tails calls not supported");
6477  // Load r2 into a virtual register and store it to the TOC save area.
6478  setUsesTOCBasePtr(DAG);
6479  SDValue Val = DAG.getCopyFromReg(Chain, dl, PPC::X2, MVT::i64);
6480  // TOC save area offset.
6481  unsigned TOCSaveOffset = Subtarget.getFrameLowering()->getTOCSaveOffset();
6482  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
6483  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
6484  Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
6486  DAG.getMachineFunction(), TOCSaveOffset));
6487  }
6488  // In the ELFv2 ABI, R12 must contain the address of an indirect callee.
6489  // This does not mean the MTCTR instruction must use R12; it's easier
6490  // to model this as an extra parameter, so do that.
6491  if (isELFv2ABI && !CFlags.IsPatchPoint)
6492  RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
6493  }
6494 
6495  // Build a sequence of copy-to-reg nodes chained together with token chain
6496  // and flag operands which copy the outgoing args into the appropriate regs.
6497  SDValue InFlag;
6498  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
6499  Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
6500  RegsToPass[i].second, InFlag);
6501  InFlag = Chain.getValue(1);
6502  }
6503 
6504  if (CFlags.IsTailCall && !IsSibCall)
6505  PrepareTailCall(DAG, InFlag, Chain, dl, SPDiff, NumBytes, LROp, FPOp,
6506  TailCallArguments);
6507 
6508  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
6509  Callee, SPDiff, NumBytes, Ins, InVals, CB);
6510 }
6511 
6512 // Returns true when the shadow of a general purpose argument register
6513 // in the parameter save area is aligned to at least 'RequiredAlign'.
6514 static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign) {
6515  assert(RequiredAlign.value() <= 16 &&
6516  "Required alignment greater than stack alignment.");
6517  switch (Reg) {
6518  default:
6519  report_fatal_error("called on invalid register.");
6520  case PPC::R5:
6521  case PPC::R9:
6522  case PPC::X3:
6523  case PPC::X5:
6524  case PPC::X7:
6525  case PPC::X9:
6526  // These registers are 16 byte aligned which is the most strict aligment
6527  // we can support.
6528  return true;
6529  case PPC::R3:
6530  case PPC::R7:
6531  case PPC::X4:
6532  case PPC::X6:
6533  case PPC::X8:
6534  case PPC::X10:
6535  // The shadow of these registers in the PSA is 8 byte aligned.
6536  return RequiredAlign <= 8;
6537  case PPC::R4:
6538  case PPC::R6:
6539  case PPC::R8:
6540  case PPC::R10:
6541  return RequiredAlign <= 4;
6542  }
6543 }
6544 
6545 static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT,
6546  CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
6547  CCState &S) {
6548  AIXCCState &State = static_cast<AIXCCState &>(S);
6549  const PPCSubtarget &Subtarget = static_cast<const PPCSubtarget &>(
6550  State.getMachineFunction().getSubtarget());
6551  const bool IsPPC64 = Subtarget.isPPC64();
6552  const Align PtrAlign = IsPPC64 ? Align(8) : Align(4);
6553  const MVT RegVT = IsPPC64 ? MVT::i64 : MVT::i32;
6554 
6555  if (ValVT == MVT::f128)
6556  report_fatal_error("f128 is unimplemented on AIX.");
6557 
6558  if (ArgFlags.isNest())
6559  report_fatal_error("Nest arguments are unimplemented.");
6560 
6561  static const MCPhysReg GPR_32[] = {// 32-bit registers.
6562  PPC::R3, PPC::R4, PPC::R5, PPC::R6,
6563  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
6564  static const MCPhysReg GPR_64[] = {// 64-bit registers.
6565  PPC::X3, PPC::X4, PPC::X5, PPC::X6,
6566  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
6567 
6568  static const MCPhysReg VR[] = {// Vector registers.
6569  PPC::V2, PPC::V3, PPC::V4, PPC::V5,
6570  PPC::V6, PPC::V7, PPC::V8, PPC::V9,
6571  PPC::V10, PPC::V11, PPC::V12, PPC::V13};
6572 
6573  if (ArgFlags.isByVal()) {
6574  if (ArgFlags.getNonZeroByValAlign() > PtrAlign)
6575  report_fatal_error("Pass-by-value arguments with alignment greater than "
6576  "register width are not supported.");
6577 
6578  const unsigned ByValSize = ArgFlags.getByValSize();
6579 
6580  // An empty aggregate parameter takes up no storage and no registers,
6581  // but needs a MemLoc for a stack slot for the formal arguments side.
6582  if (ByValSize == 0) {
6584  State.getNextStackOffset(), RegVT,
6585  LocInfo));
6586  return false;
6587  }
6588 
6589  const unsigned StackSize = alignTo(ByValSize, PtrAlign);
6590  unsigned Offset = State.AllocateStack(StackSize, PtrAlign);
6591  for (const unsigned E = Offset + StackSize; Offset < E;
6592  Offset += PtrAlign.value()) {
6593  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6594  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6595  else {
6598  LocInfo));
6599  break;
6600  }
6601  }
6602  return false;
6603  }
6604 
6605  // Arguments always reserve parameter save area.
6606  switch (ValVT.SimpleTy) {
6607  default:
6608  report_fatal_error("Unhandled value type for argument.");
6609  case MVT::i64:
6610  // i64 arguments should have been split to i32 for PPC32.
6611  assert(IsPPC64 && "PPC32 should have split i64 values.");
6613  case MVT::i1:
6614  case MVT::i32: {
6615  const unsigned Offset = State.AllocateStack(PtrAlign.value(), PtrAlign);
6616  // AIX integer arguments are always passed in register width.
6617  if (ValVT.getFixedSizeInBits() < RegVT.getFixedSizeInBits())
6618  LocInfo = ArgFlags.isSExt() ? CCValAssign::LocInfo::SExt
6619  : CCValAssign::LocInfo::ZExt;
6620  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32))
6621  State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6622  else
6623  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, RegVT, LocInfo));
6624 
6625  return false;
6626  }
6627  case MVT::f32:
6628  case MVT::f64: {
6629  // Parameter save area (PSA) is reserved even if the float passes in fpr.
6630  const unsigned StoreSize = LocVT.getStoreSize();
6631  // Floats are always 4-byte aligned in the PSA on AIX.
6632  // This includes f64 in 64-bit mode for ABI compatibility.
6633  const unsigned Offset =
6634  State.AllocateStack(IsPPC64 ? 8 : StoreSize, Align(4));
6635  unsigned FReg = State.AllocateReg(FPR);
6636  if (FReg)
6637  State.addLoc(CCValAssign::getReg(ValNo, ValVT, FReg, LocVT, LocInfo));
6638 
6639  // Reserve and initialize GPRs or initialize the PSA as required.
6640  for (unsigned I = 0; I < StoreSize; I += PtrAlign.value()) {
6641  if (unsigned Reg = State.AllocateReg(IsPPC64 ? GPR_64 : GPR_32)) {
6642  assert(FReg && "An FPR should be available when a GPR is reserved.");
6643  if (State.isVarArg()) {
6644  // Successfully reserved GPRs are only initialized for vararg calls.
6645  // Custom handling is required for:
6646  // f64 in PPC32 needs to be split into 2 GPRs.
6647  // f32 in PPC64 needs to occupy only lower 32 bits of 64-bit GPR.
6648  State.addLoc(
6649  CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6650  }
6651  } else {
6652  // If there are insufficient GPRs, the PSA needs to be initialized.
6653  // Initialization occurs even if an FPR was initialized for
6654  // compatibility with the AIX XL compiler. The full memory for the
6655  // argument will be initialized even if a prior word is saved in GPR.
6656  // A custom memLoc is used when the argument also passes in FPR so
6657  // that the callee handling can skip over it easily.
6658  State.addLoc(
6659  FReg ? CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT,
6660  LocInfo)
6661  : CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6662  break;
6663  }
6664  }
6665 
6666  return false;
6667  }
6668  case MVT::v4f32:
6669  case MVT::v4i32:
6670  case MVT::v8i16:
6671  case MVT::v16i8:
6672  case MVT::v2i64:
6673  case MVT::v2f64:
6674  case MVT::v1i128: {
6675  const unsigned VecSize = 16;
6676  const Align VecAlign(VecSize);
6677 
6678  if (!State.isVarArg()) {
6679  // If there are vector registers remaining we don't consume any stack
6680  // space.
6681  if (unsigned VReg = State.AllocateReg(VR)) {
6682  State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6683  return false;
6684  }
6685  // Vectors passed on the stack do not shadow GPRs or FPRs even though they
6686  // might be allocated in the portion of the PSA that is shadowed by the
6687  // GPRs.
6688  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6689  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6690  return false;
6691  }
6692 
6693  const unsigned PtrSize = IsPPC64 ? 8 : 4;
6694  ArrayRef<MCPhysReg> GPRs = IsPPC64 ? GPR_64 : GPR_32;
6695 
6696  unsigned NextRegIndex = State.getFirstUnallocated(GPRs);
6697  // Burn any underaligned registers and their shadowed stack space until
6698  // we reach the required alignment.
6699  while (NextRegIndex != GPRs.size() &&
6700  !isGPRShadowAligned(GPRs[NextRegIndex], VecAlign)) {
6701  // Shadow allocate register and its stack shadow.
6702  unsigned Reg = State.AllocateReg(GPRs);
6703  State.AllocateStack(PtrSize, PtrAlign);
6704  assert(Reg && "Allocating register unexpectedly failed.");
6705  (void)Reg;
6706  NextRegIndex = State.getFirstUnallocated(GPRs);
6707  }
6708 
6709  // Vectors that are passed as fixed arguments are handled differently.
6710  // They are passed in VRs if any are available (unlike arguments passed
6711  // through ellipses) and shadow GPRs (unlike arguments to non-vaarg
6712  // functions)
6713  if (State.isFixed(ValNo)) {
6714  if (unsigned VReg = State.AllocateReg(VR)) {
6715  State.addLoc(CCValAssign::getReg(ValNo, ValVT, VReg, LocVT, LocInfo));
6716  // Shadow allocate GPRs and stack space even though we pass in a VR.
6717  for (unsigned I = 0; I != VecSize; I += PtrSize)
6718  State.AllocateReg(GPRs);
6719  State.AllocateStack(VecSize, VecAlign);
6720  return false;
6721  }
6722  // No vector registers remain so pass on the stack.
6723  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6724  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6725  return false;
6726  }
6727 
6728  // If all GPRS are consumed then we pass the argument fully on the stack.
6729  if (NextRegIndex == GPRs.size()) {
6730  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6731  State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6732  return false;
6733  }
6734 
6735  // Corner case for 32-bit codegen. We have 2 registers to pass the first
6736  // half of the argument, and then need to pass the remaining half on the
6737  // stack.
6738  if (GPRs[NextRegIndex] == PPC::R9) {
6739  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6740  State.addLoc(
6741  CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6742 
6743  const unsigned FirstReg = State.AllocateReg(PPC::R9);
6744  const unsigned SecondReg = State.AllocateReg(PPC::R10);
6745  assert(FirstReg && SecondReg &&
6746  "Allocating R9 or R10 unexpectedly failed.");
6747  State.addLoc(
6748  CCValAssign::getCustomReg(ValNo, ValVT, FirstReg, RegVT, LocInfo));
6749  State.addLoc(
6750  CCValAssign::getCustomReg(ValNo, ValVT, SecondReg, RegVT, LocInfo));
6751  return false;
6752  }
6753 
6754  // We have enough GPRs to fully pass the vector argument, and we have
6755  // already consumed any underaligned registers. Start with the custom
6756  // MemLoc and then the custom RegLocs.
6757  const unsigned Offset = State.AllocateStack(VecSize, VecAlign);
6758  State.addLoc(
6759  CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
6760  for (unsigned I = 0; I != VecSize; I += PtrSize) {
6761  const unsigned Reg = State.AllocateReg(GPRs);
6762  assert(Reg && "Failed to allocated register for vararg vector argument");
6763  State.addLoc(
6764  CCValAssign::getCustomReg(ValNo, ValVT, Reg, RegVT, LocInfo));
6765  }
6766  return false;
6767  }
6768  }
6769  return true;
6770 }
6771 
6772 // So far, this function is only used by LowerFormalArguments_AIX()
6774  bool IsPPC64,
6775  bool HasP8Vector,
6776  bool HasVSX) {
6777  assert((IsPPC64 || SVT != MVT::i64) &&
6778  "i64 should have been split for 32-bit codegen.");
6779 
6780  switch (SVT) {
6781  default:
6782  report_fatal_error("Unexpected value type for formal argument");
6783  case MVT::i1:
6784  case MVT::i32:
6785  case MVT::i64:
6786  return IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
6787  case MVT::f32:
6788  return HasP8Vector ? &PPC::VSSRCRegClass : &PPC::F4RCRegClass;
6789  case MVT::f64:
6790  return HasVSX ? &PPC::VSFRCRegClass : &PPC::F8RCRegClass;
6791  case MVT::v4f32:
6792  case MVT::v4i32:
6793  case MVT::v8i16:
6794  case MVT::v16i8:
6795  case MVT::v2i64:
6796  case MVT::v2f64:
6797  case MVT::v1i128:
6798  return &PPC::VRRCRegClass;
6799  }
6800 }
6801 
6803  SelectionDAG &DAG, SDValue ArgValue,
6804  MVT LocVT, const SDLoc &dl) {
6805  assert(ValVT.isScalarInteger() && LocVT.isScalarInteger());
6806  assert(ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits());
6807 
6808  if (Flags.isSExt())
6809  ArgValue = DAG.getNode(ISD::AssertSext, dl, LocVT, ArgValue,
6810  DAG.getValueType(ValVT));
6811  else if (Flags.isZExt())
6812  ArgValue = DAG.getNode(ISD::AssertZext, dl, LocVT, ArgValue,
6813  DAG.getValueType(ValVT));
6814 
6815  return DAG.getNode(ISD::TRUNCATE, dl, ValVT, ArgValue);
6816 }
6817 
6818 static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL) {
6819  const unsigned LASize = FL->getLinkageSize();
6820 
6821  if (PPC::GPRCRegClass.contains(Reg)) {
6822  assert(Reg >= PPC::R3 && Reg <= PPC::R10 &&
6823  "Reg must be a valid argument register!");
6824  return LASize + 4 * (Reg - PPC::R3);
6825  }
6826 
6827  if (PPC::G8RCRegClass.contains(Reg)) {
6828  assert(Reg >= PPC::X3 && Reg <= PPC::X10 &&
6829  "Reg must be a valid argument register!");
6830  return LASize + 8 * (Reg - PPC::X3);
6831  }
6832 
6833  llvm_unreachable("Only general purpose registers expected.");
6834 }
6835 
6836 // AIX ABI Stack Frame Layout:
6837 //
6838 // Low Memory +--------------------------------------------+
6839 // SP +---> | Back chain | ---+
6840 // | +--------------------------------------------+ |
6841 // | | Saved Condition Register | |
6842 // | +--------------------------------------------+ |
6843 // | | Saved Linkage Register | |
6844 // | +--------------------------------------------+ | Linkage Area
6845 // | | Reserved for compilers | |
6846 // | +--------------------------------------------+ |
6847 // | | Reserved for binders | |
6848 // | +--------------------------------------------+ |
6849 // | | Saved TOC pointer | ---+
6850 // | +--------------------------------------------+
6851 // | | Parameter save area |
6852 // | +--------------------------------------------+
6853 // | | Alloca space |
6854 // | +--------------------------------------------+
6855 // | | Local variable space |
6856 // | +--------------------------------------------+
6857 // | | Float/int conversion temporary |
6858 // | +--------------------------------------------+
6859 // | | Save area for AltiVec registers |
6860 // | +--------------------------------------------+
6861 // | | AltiVec alignment padding |
6862 // | +--------------------------------------------+
6863 // | | Save area for VRSAVE register |
6864 // | +--------------------------------------------+
6865 // | | Save area for General Purpose registers |
6866 // | +--------------------------------------------+
6867 // | | Save area for Floating Point registers |
6868 // | +--------------------------------------------+
6869 // +---- | Back chain |
6870 // High Memory +--------------------------------------------+
6871 //
6872 // Specifications:
6873 // AIX 7.2 Assembler Language Reference
6874 // Subroutine linkage convention
6875 
6876 SDValue PPCTargetLowering::LowerFormalArguments_AIX(
6877  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
6878  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
6879  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
6880 
6881  assert((CallConv == CallingConv::C || CallConv == CallingConv::Cold ||
6882  CallConv == CallingConv::Fast) &&
6883  "Unexpected calling convention!");
6884 
6885  if (getTargetMachine().Options.GuaranteedTailCallOpt)
6886  report_fatal_error("Tail call support is unimplemented on AIX.");
6887 
6888  if (useSoftFloat())
6889  report_fatal_error("Soft float support is unimplemented on AIX.");
6890 
6891  const PPCSubtarget &Subtarget =
6892  static_cast<const PPCSubtarget &>(DAG.getSubtarget());
6893 
6894  const bool IsPPC64 = Subtarget.isPPC64();
6895  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
6896 
6897  // Assign locations to all of the incoming arguments.
6899  MachineFunction &MF = DAG.getMachineFunction();
6900  MachineFrameInfo &MFI = MF.getFrameInfo();
6901  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
6902  AIXCCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
6903 
6904  const EVT PtrVT = getPointerTy(MF.getDataLayout());
6905  // Reserve space for the linkage area on the stack.
6906  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
6907  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
6908  CCInfo.AnalyzeFormalArguments(Ins, CC_AIX);
6909 
6910  SmallVector<SDValue, 8> MemOps;
6911 
6912  for (size_t I = 0, End = ArgLocs.size(); I != End; /* No increment here */) {
6913  CCValAssign &VA = ArgLocs[I++];
6914  MVT LocVT = VA.getLocVT();
6915  MVT ValVT = VA.getValVT();
6916  ISD::ArgFlagsTy Flags = Ins[VA.getValNo()].Flags;
6917  // For compatibility with the AIX XL compiler, the float args in the
6918  // parameter save area are initialized even if the argument is available
6919  // in register. The caller is required to initialize both the register
6920  // and memory, however, the callee can choose to expect it in either.
6921  // The memloc is dismissed here because the argument is retrieved from
6922  // the register.
6923  if (VA.isMemLoc() && VA.needsCustom() && ValVT.isFloatingPoint())
6924  continue;
6925 
6926  auto HandleMemLoc = [&]() {
6927  const unsigned LocSize = LocVT.getStoreSize();
6928  const unsigned ValSize = ValVT.getStoreSize();
6929  assert((ValSize <= LocSize) &&
6930  "Object size is larger than size of MemLoc");
6931  int CurArgOffset = VA.getLocMemOffset();
6932  // Objects are right-justified because AIX is big-endian.
6933  if (LocSize > ValSize)
6934  CurArgOffset += LocSize - ValSize;
6935  // Potential tail calls could cause overwriting of argument stack slots.
6936  const bool IsImmutable =
6938  (CallConv == CallingConv::Fast));
6939  int FI = MFI.CreateFixedObject(ValSize, CurArgOffset, IsImmutable);
6940  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
6941  SDValue ArgValue =
6942  DAG.getLoad(ValVT, dl, Chain, FIN, MachinePointerInfo());
6943  InVals.push_back(ArgValue);
6944  };
6945 
6946  // Vector arguments to VaArg functions are passed both on the stack, and
6947  // in any available GPRs. Load the value from the stack and add the GPRs
6948  // as live ins.
6949  if (VA.isMemLoc() && VA.needsCustom()) {
6950  assert(ValVT.isVector() && "Unexpected Custom MemLoc type.");
6951  assert(isVarArg && "Only use custom memloc for vararg.");
6952  // ValNo of the custom MemLoc, so we can compare it to the ValNo of the
6953  // matching custom RegLocs.
6954  const unsigned OriginalValNo = VA.getValNo();
6955  (void)OriginalValNo;
6956 
6957  auto HandleCustomVecRegLoc = [&]() {
6958  assert(I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
6959  "Missing custom RegLoc.");
6960  VA = ArgLocs[I++];
6961  assert(VA.getValVT().isVector() &&
6962  "Unexpected Val type for custom RegLoc.");
6963  assert(VA.getValNo() == OriginalValNo &&
6964  "ValNo mismatch between custom MemLoc and RegLoc.");
6966  MF.addLiveIn(VA.getLocReg(),
6967  getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
6968  Subtarget.hasVSX()));
6969  };
6970 
6971  HandleMemLoc();
6972  // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
6973  // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
6974  // R10.
6975  HandleCustomVecRegLoc();
6976  HandleCustomVecRegLoc();
6977 
6978  // If we are targeting 32-bit, there might be 2 extra custom RegLocs if
6979  // we passed the vector in R5, R6, R7 and R8.
6980  if (I != End && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom()) {
6981  assert(!IsPPC64 &&
6982  "Only 2 custom RegLocs expected for 64-bit codegen.");
6983  HandleCustomVecRegLoc();
6984  HandleCustomVecRegLoc();
6985  }
6986 
6987  continue;
6988  }
6989 
6990  if (VA.isRegLoc()) {
6991  if (VA.getValVT().isScalarInteger())
6993  else if (VA.getValVT().isFloatingPoint() && !VA.getValVT().isVector()) {
6994  switch (VA.getValVT().SimpleTy) {
6995  default:
6996  report_fatal_error("Unhandled value type for argument.");
6997  case MVT::f32:
6999  break;
7000  case MVT::f64:
7002  break;
7003  }
7004  } else if (VA.getValVT().isVector()) {
7005  switch (VA.getValVT().SimpleTy) {
7006  default:
7007  report_fatal_error("Unhandled value type for argument.");
7008  case MVT::v16i8:
7010  break;
7011  case MVT::v8i16:
7013  break;
7014  case MVT::v4i32:
7015  case MVT::v2i64:
7016  case MVT::v1i128:
7018  break;
7019  case MVT::v4f32:
7020  case MVT::v2f64:
7022  break;
7023  }
7024  }
7025  }
7026 
7027  if (Flags.isByVal() && VA.isMemLoc()) {
7028  const unsigned Size =
7029  alignTo(Flags.getByValSize() ? Flags.getByValSize() : PtrByteSize,
7030  PtrByteSize);
7031  const int FI = MF.getFrameInfo().CreateFixedObject(
7032  Size, VA.getLocMemOffset(), /* IsImmutable */ false,
7033  /* IsAliased */ true);
7034  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7035  InVals.push_back(FIN);
7036 
7037  continue;
7038  }
7039 
7040  if (Flags.isByVal()) {
7041  assert(VA.isRegLoc() && "MemLocs should already be handled.");
7042 
7043  const MCPhysReg ArgReg = VA.getLocReg();
7044  const PPCFrameLowering *FL = Subtarget.getFrameLowering();
7045 
7046  if (Flags.getNonZeroByValAlign() > PtrByteSize)
7047  report_fatal_error("Over aligned byvals not supported yet.");
7048 
7049  const unsigned StackSize = alignTo(Flags.getByValSize(), PtrByteSize);
7050  const int FI = MF.getFrameInfo().CreateFixedObject(
7051  StackSize, mapArgRegToOffsetAIX(ArgReg, FL), /* IsImmutable */ false,
7052  /* IsAliased */ true);
7053  SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
7054  InVals.push_back(FIN);
7055 
7056  // Add live ins for all the RegLocs for the same ByVal.
7057  const TargetRegisterClass *RegClass =
7058  IsPPC64 ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
7059 
7060  auto HandleRegLoc = [&, RegClass, LocVT](const MCPhysReg PhysReg,
7061  unsigned Offset) {
7062  const Register VReg = MF.addLiveIn(PhysReg, RegClass);
7063  // Since the callers side has left justified the aggregate in the
7064  // register, we can simply store the entire register into the stack
7065  // slot.
7066  SDValue CopyFrom = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7067  // The store to the fixedstack object is needed becuase accessing a
7068  // field of the ByVal will use a gep and load. Ideally we will optimize
7069  // to extracting the value from the register directly, and elide the
7070  // stores when the arguments address is not taken, but that will need to
7071  // be future work.
7072  SDValue Store = DAG.getStore(
7073  CopyFrom.getValue(1), dl, CopyFrom,
7074  DAG.getObjectPtrOffset(dl, FIN, TypeSize::Fixed(Offset)),
7076 
7077  MemOps.push_back(Store);
7078  };
7079 
7080  unsigned Offset = 0;
7081  HandleRegLoc(VA.getLocReg(), Offset);
7082  Offset += PtrByteSize;
7083  for (; Offset != StackSize && ArgLocs[I].isRegLoc();
7084  Offset += PtrByteSize) {
7085  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7086  "RegLocs should be for ByVal argument.");
7087 
7088  const CCValAssign RL = ArgLocs[I++];
7089  HandleRegLoc(RL.getLocReg(), Offset);
7091  }
7092 
7093  if (Offset != StackSize) {
7094  assert(ArgLocs[I].getValNo() == VA.getValNo() &&
7095  "Expected MemLoc for remaining bytes.");
7096  assert(ArgLocs[I].isMemLoc() && "Expected MemLoc for remaining bytes.");
7097  // Consume the MemLoc.The InVal has already been emitted, so nothing
7098  // more needs to be done.
7099  ++I;
7100  }
7101 
7102  continue;
7103  }
7104 
7105  if (VA.isRegLoc() && !VA.needsCustom()) {
7106  MVT::SimpleValueType SVT = ValVT.SimpleTy;
7107  Register VReg =
7108  MF.addLiveIn(VA.getLocReg(),
7109  getRegClassForSVT(SVT, IsPPC64, Subtarget.hasP8Vector(),
7110  Subtarget.hasVSX()));
7111  SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, VReg, LocVT);
7112  if (ValVT.isScalarInteger() &&
7113  (ValVT.getFixedSizeInBits() < LocVT.getFixedSizeInBits())) {
7114  ArgValue =
7115  truncateScalarIntegerArg(Flags, ValVT, DAG, ArgValue, LocVT, dl);
7116  }
7117  InVals.push_back(ArgValue);
7118  continue;
7119  }
7120  if (VA.isMemLoc()) {
7121  HandleMemLoc();
7122  continue;
7123  }
7124  }
7125 
7126  // On AIX a minimum of 8 words is saved to the parameter save area.
7127  const unsigned MinParameterSaveArea = 8 * PtrByteSize;
7128  // Area that is at least reserved in the caller of this function.
7129  unsigned CallerReservedArea =
7130  std::max(CCInfo.getNextStackOffset(), LinkageSize + MinParameterSaveArea);
7131 
7132  // Set the size that is at least reserved in caller of this function. Tail
7133  // call optimized function's reserved stack space needs to be aligned so
7134  // that taking the difference between two stack areas will result in an
7135  // aligned stack.
7136  CallerReservedArea =
7137  EnsureStackAlignment(Subtarget.getFrameLowering(), CallerReservedArea);
7138  FuncInfo->setMinReservedArea(CallerReservedArea);
7139 
7140  if (isVarArg) {
7141  FuncInfo->setVarArgsFrameIndex(
7142  MFI.CreateFixedObject(PtrByteSize, CCInfo.getNextStackOffset(), true));
7143  SDValue FIN = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT);
7144 
7145  static const MCPhysReg GPR_32[] = {PPC::R3, PPC::R4, PPC::R5, PPC::R6,
7146  PPC::R7, PPC::R8, PPC::R9, PPC::R10};
7147 
7148  static const MCPhysReg GPR_64[] = {PPC::X3, PPC::X4, PPC::X5, PPC::X6,
7149  PPC::X7, PPC::X8, PPC::X9, PPC::X10};
7150  const unsigned NumGPArgRegs = array_lengthof(IsPPC64 ? GPR_64 : GPR_32);
7151 
7152  // The fixed integer arguments of a variadic function are stored to the
7153  // VarArgsFrameIndex on the stack so that they may be loaded by
7154  // dereferencing the result of va_next.
7155  for (unsigned GPRIndex =
7156  (CCInfo.getNextStackOffset() - LinkageSize) / PtrByteSize;
7157  GPRIndex < NumGPArgRegs; ++GPRIndex) {
7158 
7159  const Register VReg =
7160  IsPPC64 ? MF.addLiveIn(GPR_64[GPRIndex], &PPC::G8RCRegClass)
7161  : MF.addLiveIn(GPR_32[GPRIndex], &PPC::GPRCRegClass);
7162 
7163  SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
7164  SDValue Store =
7165  DAG.getStore(Val.getValue(1), dl, Val, FIN, MachinePointerInfo());
7166  MemOps.push_back(Store);
7167  // Increment the address for the next argument to store.
7168  SDValue PtrOff = DAG.getConstant(PtrByteSize, dl, PtrVT);
7169  FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff);
7170  }
7171  }
7172 
7173  if (!MemOps.empty())
7174  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOps);
7175 
7176  return Chain;
7177 }
7178 
7179 SDValue PPCTargetLowering::LowerCall_AIX(
7180  SDValue Chain, SDValue Callee, CallFlags CFlags,
7181  const SmallVectorImpl<ISD::OutputArg> &Outs,
7182  const SmallVectorImpl<SDValue> &OutVals,
7183  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
7184  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
7185  const CallBase *CB) const {
7186  // See PPCTargetLowering::LowerFormalArguments_AIX() for a description of the
7187  // AIX ABI stack frame layout.
7188 
7189  assert((CFlags.CallConv == CallingConv::C ||
7190  CFlags.CallConv == CallingConv::Cold ||
7191  CFlags.CallConv == CallingConv::Fast) &&
7192  "Unexpected calling convention!");
7193 
7194  if (CFlags.IsPatchPoint)
7195  report_fatal_error("This call type is unimplemented on AIX.");
7196 
7197  const PPCSubtarget& Subtarget =
7198  static_cast<const PPCSubtarget&>(DAG.getSubtarget());
7199 
7200  MachineFunction &MF = DAG.getMachineFunction();
7202  AIXCCState CCInfo(CFlags.CallConv, CFlags.IsVarArg, MF, ArgLocs,
7203  *DAG.getContext());
7204 
7205  // Reserve space for the linkage save area (LSA) on the stack.
7206  // In both PPC32 and PPC64 there are 6 reserved slots in the LSA:
7207  // [SP][CR][LR][2 x reserved][TOC].
7208  // The LSA is 24 bytes (6x4) in PPC32 and 48 bytes (6x8) in PPC64.
7209  const unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
7210  const bool IsPPC64 = Subtarget.isPPC64();
7211  const EVT PtrVT = getPointerTy(DAG.getDataLayout());
7212  const unsigned PtrByteSize = IsPPC64 ? 8 : 4;
7213  CCInfo.AllocateStack(LinkageSize, Align(PtrByteSize));
7214  CCInfo.AnalyzeCallOperands(Outs, CC_AIX);
7215 
7216  // The prolog code of the callee may store up to 8 GPR argument registers to
7217  // the stack, allowing va_start to index over them in memory if the callee
7218  // is variadic.
7219  // Because we cannot tell if this is needed on the caller side, we have to
7220  // conservatively assume that it is needed. As such, make sure we have at
7221  // least enough stack space for the caller to store the 8 GPRs.
7222  const unsigned MinParameterSaveAreaSize = 8 * PtrByteSize;
7223  const unsigned NumBytes = std::max(LinkageSize + MinParameterSaveAreaSize,
7224  CCInfo.getNextStackOffset());
7225 
7226  // Adjust the stack pointer for the new arguments...
7227  // These operations are automatically eliminated by the prolog/epilog pass.
7228  Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
7229  SDValue CallSeqStart = Chain;
7230 
7232  SmallVector<SDValue, 8> MemOpChains;
7233 
7234  // Set up a copy of the stack pointer for loading and storing any
7235  // arguments that may not fit in the registers available for argument
7236  // passing.
7237  const SDValue StackPtr = IsPPC64 ? DAG.getRegister(PPC::X1, MVT::i64)
7238  : DAG.getRegister(PPC::R1, MVT::i32);
7239 
7240  for (unsigned I = 0, E = ArgLocs.size(); I != E;) {
7241  const unsigned ValNo = ArgLocs[I].getValNo();
7242  SDValue Arg = OutVals[ValNo];
7243  ISD::ArgFlagsTy Flags = Outs[ValNo].Flags;
7244 
7245  if (Flags.isByVal()) {
7246  const unsigned ByValSize = Flags.getByValSize();
7247 
7248  // Nothing to do for zero-sized ByVals on the caller side.
7249  if (!ByValSize) {
7250  ++I;
7251  continue;
7252  }
7253 
7254  auto GetLoad = [&](EVT VT, unsigned LoadOffset) {
7255  return DAG.getExtLoad(
7256  ISD::ZEXTLOAD, dl, PtrVT, Chain,
7257  (LoadOffset != 0)
7258  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7259  : Arg,
7260  MachinePointerInfo(), VT);
7261  };
7262 
7263  unsigned LoadOffset = 0;
7264 
7265  // Initialize registers, which are fully occupied by the by-val argument.
7266  while (LoadOffset + PtrByteSize <= ByValSize && ArgLocs[I].isRegLoc()) {
7267  SDValue Load = GetLoad(PtrVT, LoadOffset);
7268  MemOpChains.push_back(Load.getValue(1));
7269  LoadOffset += PtrByteSize;
7270  const CCValAssign &ByValVA = ArgLocs[I++];
7271  assert(ByValVA.getValNo() == ValNo &&
7272  "Unexpected location for pass-by-value argument.");
7273  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), Load));
7274  }
7275 
7276  if (LoadOffset == ByValSize)
7277  continue;
7278 
7279  // There must be one more loc to handle the remainder.
7280  assert(ArgLocs[I].getValNo() == ValNo &&
7281  "Expected additional location for by-value argument.");
7282 
7283  if (ArgLocs[I].isMemLoc()) {
7284  assert(LoadOffset < ByValSize && "Unexpected memloc for by-val arg.");
7285  const CCValAssign &ByValVA = ArgLocs[I++];
7286  ISD::ArgFlagsTy MemcpyFlags = Flags;
7287  // Only memcpy the bytes that don't pass in register.
7288  MemcpyFlags.setByValSize(ByValSize - LoadOffset);
7289  Chain = CallSeqStart = createMemcpyOutsideCallSeq(
7290  (LoadOffset != 0)
7291  ? DAG.getObjectPtrOffset(dl, Arg, TypeSize::Fixed(LoadOffset))
7292  : Arg,
7293  DAG.getObjectPtrOffset(dl, StackPtr,
7294  TypeSize::Fixed(ByValVA.getLocMemOffset())),
7295  CallSeqStart, MemcpyFlags, DAG, dl);
7296  continue;
7297  }
7298 
7299  // Initialize the final register residue.
7300  // Any residue that occupies the final by-val arg register must be
7301  // left-justified on AIX. Loads must be a power-of-2 size and cannot be
7302  // larger than the ByValSize. For example: a 7 byte by-val arg requires 4,
7303  // 2 and 1 byte loads.
7304  const unsigned ResidueBytes = ByValSize % PtrByteSize;
7305  assert(ResidueBytes != 0 && LoadOffset + PtrByteSize > ByValSize &&
7306  "Unexpected register residue for by-value argument.");
7307  SDValue ResidueVal;
7308  for (unsigned Bytes = 0; Bytes != ResidueBytes;) {
7309  const unsigned N = PowerOf2Floor(ResidueBytes - Bytes);
7310  const MVT VT =
7311  N == 1 ? MVT::i8
7312  : ((N == 2) ? MVT::i16 : (N == 4 ? MVT::i32 : MVT::i64));
7313  SDValue Load = GetLoad(VT, LoadOffset);
7314  MemOpChains.push_back(Load.getValue(1));
7315  LoadOffset += N;
7316  Bytes += N;
7317 
7318  // By-val arguments are passed left-justfied in register.
7319  // Every load here needs to be shifted, otherwise a full register load
7320  // should have been used.
7321  assert(PtrVT.getSimpleVT().getSizeInBits() > (Bytes * 8) &&
7322  "Unexpected load emitted during handling of pass-by-value "
7323  "argument.");
7324  unsigned NumSHLBits = PtrVT.getSimpleVT().getSizeInBits() - (Bytes * 8);
7325  EVT ShiftAmountTy =
7326  getShiftAmountTy(Load->getValueType(0), DAG.getDataLayout());
7327  SDValue SHLAmt = DAG.getConstant(NumSHLBits, dl, ShiftAmountTy);
7328  SDValue ShiftedLoad =
7329  DAG.getNode(ISD::SHL, dl, Load.getValueType(), Load, SHLAmt);
7330  ResidueVal = ResidueVal ? DAG.getNode(ISD::OR, dl, PtrVT, ResidueVal,
7331  ShiftedLoad)
7332  : ShiftedLoad;
7333  }
7334 
7335  const CCValAssign &ByValVA = ArgLocs[I++];
7336  RegsToPass.push_back(std::make_pair(ByValVA.getLocReg(), ResidueVal));
7337  continue;
7338  }
7339 
7340  CCValAssign &VA = ArgLocs[I++];
7341  const MVT LocVT = VA.getLocVT();
7342  const MVT ValVT = VA.getValVT();
7343 
7344  switch (VA.getLocInfo()) {
7345  default:
7346  report_fatal_error("Unexpected argument extension type.");
7347  case CCValAssign::Full:
7348  break;
7349  case CCValAssign::ZExt:
7350  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7351  break;
7352  case CCValAssign::SExt:
7353  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7354  break;
7355  }
7356 
7357  if (VA.isRegLoc() && !VA.needsCustom()) {
7358  RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
7359  continue;
7360  }
7361 
7362  // Vector arguments passed to VarArg functions need custom handling when
7363  // they are passed (at least partially) in GPRs.
7364  if (VA.isMemLoc() && VA.needsCustom() && ValVT.isVector()) {
7365  assert(CFlags.IsVarArg && "Custom MemLocs only used for Vector args.");
7366  // Store value to its stack slot.
7367  SDValue PtrOff =
7368  DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7369  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7370  SDValue Store =
7371  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo());
7372  MemOpChains.push_back(Store);
7373  const unsigned OriginalValNo = VA.getValNo();
7374  // Then load the GPRs from the stack
7375  unsigned LoadOffset = 0;
7376  auto HandleCustomVecRegLoc = [&]() {
7377  assert(I != E && "Unexpected end of CCvalAssigns.");
7378  assert(ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7379  "Expected custom RegLoc.");
7380  CCValAssign RegVA = ArgLocs[I++];
7381  assert(RegVA.getValNo() == OriginalValNo &&
7382  "Custom MemLoc ValNo and custom RegLoc ValNo must match.");
7383  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, PtrOff,
7384  DAG.getConstant(LoadOffset, dl, PtrVT));
7385  SDValue Load = DAG.getLoad(PtrVT, dl, Store, Add, MachinePointerInfo());
7386  MemOpChains.push_back(Load.getValue(1));
7387  RegsToPass.push_back(std::make_pair(RegVA.getLocReg(), Load));
7388  LoadOffset += PtrByteSize;
7389  };
7390 
7391  // In 64-bit there will be exactly 2 custom RegLocs that follow, and in
7392  // in 32-bit there will be 2 custom RegLocs if we are passing in R9 and
7393  // R10.
7394  HandleCustomVecRegLoc();
7395  HandleCustomVecRegLoc();
7396 
7397  if (I != E && ArgLocs[I].isRegLoc() && ArgLocs[I].needsCustom() &&
7398  ArgLocs[I].getValNo() == OriginalValNo) {
7399  assert(!IsPPC64 &&
7400  "Only 2 custom RegLocs expected for 64-bit codegen.");
7401  HandleCustomVecRegLoc();
7402  HandleCustomVecRegLoc();
7403  }
7404 
7405  continue;
7406  }
7407 
7408  if (VA.isMemLoc()) {
7409  SDValue PtrOff =
7410  DAG.getConstant(VA.getLocMemOffset(), dl, StackPtr.getValueType());
7411  PtrOff = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7412  MemOpChains.push_back(
7413  DAG.getStore(Chain, dl, Arg, PtrOff, MachinePointerInfo()));
7414 
7415  continue;
7416  }
7417 
7418  if (!ValVT.isFloatingPoint())
7420  "Unexpected register handling for calling convention.");
7421 
7422  // Custom handling is used for GPR initializations for vararg float
7423  // arguments.
7424  assert(VA.isRegLoc() && VA.needsCustom() && CFlags.IsVarArg &&
7425  LocVT.isInteger() &&
7426  "Custom register handling only expected for VarArg.");
7427 
7428  SDValue ArgAsInt =
7430 
7431  if (Arg.getValueType().getStoreSize() == LocVT.getStoreSize())
7432  // f32 in 32-bit GPR
7433  // f64 in 64-bit GPR
7434  RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgAsInt));
7435  else if (Arg.getValueType().getFixedSizeInBits() <
7436  LocVT.getFixedSizeInBits())
7437  // f32 in 64-bit GPR.
7438  RegsToPass.push_back(std::make_pair(
7439  VA.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, LocVT)));
7440  else {
7441  // f64 in two 32-bit GPRs
7442  // The 2 GPRs are marked custom and expected to be adjacent in ArgLocs.
7443  assert(Arg.getValueType() == MVT::f64 && CFlags.IsVarArg && !IsPPC64 &&
7444  "Unexpected custom register for argument!");
7445  CCValAssign &GPR1 = VA;
7446  SDValue MSWAsI64 = DAG.getNode(ISD::SRL, dl, MVT::i64, ArgAsInt,
7447  DAG.getConstant(32, dl, MVT::i8));
7448  RegsToPass.push_back(std::make_pair(
7449  GPR1.getLocReg(), DAG.getZExtOrTrunc(MSWAsI64, dl, MVT::i32)));
7450 
7451  if (I != E) {
7452  // If only 1 GPR was available, there will only be one custom GPR and
7453  // the argument will also pass in memory.
7454  CCValAssign &PeekArg = ArgLocs[I];
7455  if (PeekArg.isRegLoc() && PeekArg.getValNo() == PeekArg.getValNo()) {
7456  assert(PeekArg.needsCustom() && "A second custom GPR is expected.");
7457  CCValAssign &GPR2 = ArgLocs[I++];
7458  RegsToPass.push_back(std::make_pair(
7459  GPR2.getLocReg(), DAG.getZExtOrTrunc(ArgAsInt, dl, MVT::i32)));
7460  }
7461  }
7462  }
7463  }
7464 
7465  if (!MemOpChains.empty())
7466  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
7467 
7468  // For indirect calls, we need to save the TOC base to the stack for
7469  // restoration after the call.
7470  if (CFlags.IsIndirect) {
7471  assert(!CFlags.IsTailCall && "Indirect tail-calls not supported.");
7472  const MCRegister TOCBaseReg = Subtarget.getTOCPointerRegister();
7473  const MCRegister StackPtrReg = Subtarget.getStackPointerRegister();
7474  const MVT PtrVT = Subtarget.isPPC64() ? MVT::i64 : MVT::i32;
7475  const unsigned TOCSaveOffset =
7476  Subtarget.getFrameLowering()->getTOCSaveOffset();
7477 
7478  setUsesTOCBasePtr(DAG);
7479  SDValue Val = DAG.getCopyFromReg(Chain, dl, TOCBaseReg, PtrVT);
7480  SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset, dl);
7481  SDValue StackPtr = DAG.getRegister(StackPtrReg, PtrVT);
7482  SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
7483  Chain = DAG.getStore(
7484  Val.getValue(1), dl, Val, AddPtr,
7485  MachinePointerInfo::getStack(DAG.getMachineFunction(), TOCSaveOffset));
7486  }
7487 
7488  // Build a sequence of copy-to-reg nodes chained together with token chain
7489  // and flag operands which copy the outgoing args into the appropriate regs.
7490  SDValue InFlag;
7491  for (auto Reg : RegsToPass) {
7492  Chain = DAG.getCopyToReg(Chain, dl, Reg.first, Reg.second, InFlag);
7493  InFlag = Chain.getValue(1);
7494  }
7495 
7496  const int SPDiff = 0;
7497  return FinishCall(CFlags, dl, DAG, RegsToPass, InFlag, Chain, CallSeqStart,
7498  Callee, SPDiff, NumBytes, Ins, InVals, CB);
7499 }
7500 
7501 bool
7502 PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
7503  MachineFunction &MF, bool isVarArg,
7504  const SmallVectorImpl<ISD::OutputArg> &Outs,
7505  LLVMContext &Context) const {
7507  CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
7508  return CCInfo.CheckReturn(
7509  Outs, (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7510  ? RetCC_PPC_Cold
7511  : RetCC_PPC);
7512 }
7513 
7514 SDValue
7515 PPCTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
7516  bool isVarArg,
7517  const SmallVectorImpl<ISD::OutputArg> &Outs,
7518  const SmallVectorImpl<SDValue> &OutVals,
7519  const SDLoc &dl, SelectionDAG &DAG) const {
7521  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
7522  *DAG.getContext());
7523  CCInfo.AnalyzeReturn(Outs,
7524  (Subtarget.isSVR4ABI() && CallConv == CallingConv::Cold)
7525  ? RetCC_PPC_Cold
7526  : RetCC_PPC);
7527 
7528  SDValue Flag;
7529  SmallVector<SDValue, 4> RetOps(1, Chain);
7530 
7531  // Copy the result values into the output registers.
7532  for (unsigned i = 0, RealResIdx = 0; i != RVLocs.size(); ++i, ++RealResIdx) {
7533  CCValAssign &VA = RVLocs[i];
7534  assert(VA.isRegLoc() && "Can only return in registers!");
7535 
7536  SDValue Arg = OutVals[RealResIdx];
7537 
7538  switch (VA.getLocInfo()) {
7539  default: llvm_unreachable("Unknown loc info!");
7540  case CCValAssign::Full: break;
7541  case CCValAssign::AExt:
7542  Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
7543  break;
7544  case CCValAssign::ZExt:
7545  Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
7546  break;
7547  case CCValAssign::SExt:
7548  Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
7549  break;
7550  }
7551  if (Subtarget.hasSPE() && VA.getLocVT() == MVT::f64) {
7552  bool isLittleEndian = Subtarget.isLittleEndian();
7553  // Legalize ret f64 -> ret 2 x i32.
7554  SDValue SVal =
7556  DAG.getIntPtrConstant(isLittleEndian ? 0 : 1, dl));
7557  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7558  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7559  SVal = DAG.getNode(PPCISD::EXTRACT_SPE, dl, MVT::i32, Arg,
7560  DAG.getIntPtrConstant(isLittleEndian ? 1 : 0, dl));
7561  Flag = Chain.getValue(1);
7562  VA = RVLocs[++i]; // skip ahead to next loc
7563  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), SVal, Flag);
7564  } else
7565  Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Arg, Flag);
7566  Flag = Chain.getValue(1);
7567  RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
7568  }
7569 
7570  RetOps[0] = Chain; // Update chain.
7571 
7572  // Add the flag if we have it.
7573  if (Flag.getNode())
7574  RetOps.push_back(Flag);
7575 
7576  return DAG.getNode(PPCISD::RET_FLAG, dl, MVT::Other, RetOps);
7577 }
7578 
7579 SDValue
7580 PPCTargetLowering::LowerGET_DYNAMIC_AREA_OFFSET(SDValue Op,
7581  SelectionDAG &DAG) const {
7582  SDLoc dl(Op);
7583 
7584  // Get the correct type for integers.
7585  EVT IntVT = Op.getValueType();
7586 
7587  // Get the inputs.
7588  SDValue Chain = Op.getOperand(0);
7589  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7590  // Build a DYNAREAOFFSET node.
7591  SDValue Ops[2] = {Chain, FPSIdx};
7592  SDVTList VTs = DAG.getVTList(IntVT);
7593  return DAG.getNode(PPCISD::DYNAREAOFFSET, dl, VTs, Ops);
7594 }
7595 
7596 SDValue PPCTargetLowering::LowerSTACKRESTORE(SDValue Op,
7597  SelectionDAG &DAG) const {
7598  // When we pop the dynamic allocation we need to restore the SP link.
7599  SDLoc dl(Op);
7600 
7601  // Get the correct type for pointers.
7602  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7603 
7604  // Construct the stack pointer operand.
7605  bool isPPC64 = Subtarget.isPPC64();
7606  unsigned SP = isPPC64 ? PPC::X1 : PPC::R1;
7607  SDValue StackPtr = DAG.getRegister(SP, PtrVT);
7608 
7609  // Get the operands for the STACKRESTORE.
7610  SDValue Chain = Op.getOperand(0);
7611  SDValue SaveSP = Op.getOperand(1);
7612 
7613  // Load the old link SP.
7614  SDValue LoadLinkSP =
7615  DAG.getLoad(PtrVT, dl, Chain, StackPtr, MachinePointerInfo());
7616 
7617  // Restore the stack pointer.
7618  Chain = DAG.getCopyToReg(LoadLinkSP.getValue(1), dl, SP, SaveSP);
7619 
7620  // Store the old link SP.
7621  return DAG.getStore(Chain, dl, LoadLinkSP, StackPtr, MachinePointerInfo());
7622 }
7623 
7624 SDValue PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG &DAG) const {
7625  MachineFunction &MF = DAG.getMachineFunction();
7626  bool isPPC64 = Subtarget.isPPC64();
7627  EVT PtrVT = getPointerTy(MF.getDataLayout());
7628 
7629  // Get current frame pointer save index. The users of this index will be
7630  // primarily DYNALLOC instructions.
7632  int RASI = FI->getReturnAddrSaveIndex();
7633 
7634  // If the frame pointer save index hasn't been defined yet.
7635  if (!RASI) {
7636  // Find out what the fix offset of the frame pointer save area.
7637  int LROffset = Subtarget.getFrameLowering()->getReturnSaveOffset();
7638  // Allocate the frame index for frame pointer save area.
7639  RASI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, LROffset, false);
7640  // Save the result.
7641  FI->setReturnAddrSaveIndex(RASI);
7642  }
7643  return DAG.getFrameIndex(RASI, PtrVT);
7644 }
7645 
7646 SDValue
7647 PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
7648  MachineFunction &MF = DAG.getMachineFunction();
7649  bool isPPC64 = Subtarget.isPPC64();
7650  EVT PtrVT = getPointerTy(MF.getDataLayout());
7651 
7652  // Get current frame pointer save index. The users of this index will be
7653  // primarily DYNALLOC instructions.
7655  int FPSI = FI->getFramePointerSaveIndex();
7656 
7657  // If the frame pointer save index hasn't been defined yet.
7658  if (!FPSI) {
7659  // Find out what the fix offset of the frame pointer save area.
7660  int FPOffset = Subtarget.getFrameLowering()->getFramePointerSaveOffset();
7661  // Allocate the frame index for frame pointer save area.
7662  FPSI = MF.getFrameInfo().CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
7663  // Save the result.
7664  FI->setFramePointerSaveIndex(FPSI);
7665  }
7666  return DAG.getFrameIndex(FPSI, PtrVT);
7667 }
7668 
7669 SDValue PPCTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
7670  SelectionDAG &DAG) const {
7671  MachineFunction &MF = DAG.getMachineFunction();
7672  // Get the inputs.
7673  SDValue Chain = Op.getOperand(0);
7674  SDValue Size = Op.getOperand(1);
7675  SDLoc dl(Op);
7676 
7677  // Get the correct type for pointers.
7678  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7679  // Negate the size.
7680  SDValue NegSize = DAG.getNode(ISD::SUB, dl, PtrVT,
7681  DAG.getConstant(0, dl, PtrVT), Size);
7682  // Construct a node for the frame pointer save index.
7683  SDValue FPSIdx = getFramePointerFrameIndex(DAG);
7684  SDValue Ops[3] = { Chain, NegSize, FPSIdx };
7685  SDVTList VTs = DAG.getVTList(PtrVT, MVT::Other);
7686  if (hasInlineStackProbe(MF))
7687  return DAG.getNode(PPCISD::PROBED_ALLOCA, dl, VTs, Ops);
7688  return DAG.getNode(PPCISD::DYNALLOC, dl, VTs, Ops);
7689 }
7690 
7691 SDValue PPCTargetLowering::LowerEH_DWARF_CFA(SDValue Op,
7692  SelectionDAG &DAG) const {
7693  MachineFunction &MF = DAG.getMachineFunction();
7694 
7695  bool isPPC64 = Subtarget.isPPC64();
7696  EVT PtrVT = getPointerTy(DAG.getDataLayout());
7697 
7698  int FI = MF.getFrameInfo().CreateFixedObject(isPPC64 ? 8 : 4, 0, false);
7699  return DAG.getFrameIndex(FI, PtrVT);
7700 }
7701 
7702 SDValue PPCTargetLowering::lowerEH_SJLJ_SETJMP(SDValue Op,
7703  SelectionDAG &DAG) const {
7704  SDLoc DL(Op);
7705  return DAG.getNode(PPCISD::EH_SJLJ_SETJMP, DL,
7707  Op.getOperand(0), Op.getOperand(1));
7708 }
7709 
7710 SDValue PPCTargetLowering::lowerEH_SJLJ_LONGJMP(SDValue Op,
7711  SelectionDAG &DAG) const {
7712  SDLoc DL(Op);
7714  Op.getOperand(0), Op.getOperand(1));
7715 }
7716 
7717 SDValue PPCTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
7718  if (Op.getValueType().isVector())
7719  return LowerVectorLoad(Op, DAG);
7720 
7721  assert(Op.getValueType() == MVT::i1 &&
7722  "Custom lowering only for i1 loads");
7723 
7724  // First, load 8 bits into 32 bits, then truncate to 1 bit.
7725 
7726  SDLoc dl(Op);
7727  LoadSDNode *LD = cast<LoadSDNode>(Op);
7728 
7729  SDValue Chain = LD->getChain();
7730  SDValue BasePtr = LD->getBasePtr();
7731  MachineMemOperand *MMO = LD->getMemOperand();
7732 
7733  SDValue NewLD =
7734  DAG.getExtLoad(ISD::EXTLOAD, dl, getPointerTy(DAG.getDataLayout()), Chain,
7735  BasePtr, MVT::i8, MMO);
7736  SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewLD);
7737 
7738  SDValue Ops[] = { Result, SDValue(NewLD.getNode(), 1) };
7739  return DAG.getMergeValues(Ops, dl);
7740 }
7741 
7742 SDValue PPCTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
7743  if (Op.getOperand(1).getValueType().isVector())
7744  return LowerVectorStore(Op, DAG);
7745 
7746  assert(Op.getOperand(1).getValueType() == MVT::i1 &&
7747  "Custom lowering only for i1 stores");
7748 
7749  // First, zero extend to 32 bits, then use a truncating store to 8 bits.
7750 
7751  SDLoc dl(Op);
7752  StoreSDNode *ST = cast<StoreSDNode>(Op);
7753 
7754  SDValue Chain = ST->getChain();
7755  SDValue BasePtr = ST->getBasePtr();
7756  SDValue Value = ST->getValue();
7757  MachineMemOperand *MMO = ST->getMemOperand();
7758 
7760  Value);
7761  return DAG.getTruncStore(Chain, dl, Value, BasePtr, MVT::i8, MMO);
7762 }
7763 
7764 // FIXME: Remove this once the ANDI glue bug is fixed:
7765 SDValue PPCTargetLowering::LowerTRUNCATE(SDValue Op, SelectionDAG &DAG) const {
7766  assert(Op.getValueType() == MVT::i1 &&
7767  "Custom lowering only for i1 results");
7768 
7769  SDLoc DL(Op);
7770  return DAG.getNode(PPCISD::ANDI_rec_1_GT_BIT, DL, MVT::i1, Op.getOperand(0));
7771 }
7772 
7773 SDValue PPCTargetLowering::LowerTRUNCATEVector(SDValue Op,
7774  SelectionDAG &DAG) const {
7775 
7776  // Implements a vector truncate that fits in a vector register as a shuffle.
7777  // We want to legalize vector truncates down to where the source fits in
7778  // a vector register (and target is therefore smaller than vector register
7779  // size). At that point legalization will try to custom lower the sub-legal
7780  // result and get here - where we can contain the truncate as a single target
7781  // operation.
7782 
7783  // For example a trunc <2 x i16> to <2 x i8> could be visualized as follows:
7784  // <MSB1|LSB1, MSB2|LSB2> to <LSB1, LSB2>
7785  //
7786  // We will implement it for big-endian ordering as this (where x denotes
7787  // undefined):
7788  // < MSB1|LSB1, MSB2|LSB2, uu, uu, uu, uu, uu, uu> to
7789  // < LSB1, LSB2, u, u, u, u, u, u, u, u, u, u, u, u, u, u>
7790  //
7791  // The same operation in little-endian ordering will be:
7792  // <uu, uu, uu, uu, uu, uu, LSB2|MSB2, LSB1|MSB1> to
7793  // <u, u, u, u, u, u, u, u, u, u, u, u, u, u, LSB2, LSB1>
7794 
7795  EVT TrgVT = Op.getValueType();
7796  assert(TrgVT.isVector() && "Vector type expected.");
7797  unsigned TrgNumElts = TrgVT.getVectorNumElements();
7798  EVT EltVT = TrgVT.getVectorElementType();
7799  if (!isOperationCustom(Op.getOpcode(), TrgVT) ||
7800  TrgVT.getSizeInBits() > 128 || !isPowerOf2_32(TrgNumElts) ||
7801  !isPowerOf2_32(EltVT.getSizeInBits()))
7802  return SDValue();
7803 
7804  SDValue N1 = Op.getOperand(0);
7805  EVT SrcVT = N1.getValueType();
7806  unsigned SrcSize = SrcVT.getSizeInBits();
7807  if (SrcSize > 256 ||
7808  !isPowerOf2_32(SrcVT.getVectorNumElements()) ||
7810  return SDValue();
7811  if (SrcSize == 256 && SrcVT.getVectorNumElements() < 2)
7812  return SDValue();
7813 
7814  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
7815  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
7816 
7817  SDLoc DL(Op);
7818  SDValue Op1, Op2;
7819  if (SrcSize == 256) {
7820  EVT VecIdxTy = getVectorIdxTy(DAG.getDataLayout());
7821  EVT SplitVT =
7823  unsigned SplitNumElts = SplitVT.getVectorNumElements();
7824  Op1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7825  DAG.getConstant(0, DL, VecIdxTy));
7826  Op2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, N1,
7827  DAG.getConstant(SplitNumElts, DL, VecIdxTy));
7828  }
7829  else {
7830  Op1 = SrcSize == 128 ? N1 : widenVec(DAG, N1, DL);
7831  Op2 = DAG.getUNDEF(WideVT);
7832  }
7833 
7834  // First list the elements we want to keep.
7835  unsigned SizeMult = SrcSize / TrgVT.getSizeInBits();
7836  SmallVector<int, 16> ShuffV;
7837  if (Subtarget.isLittleEndian())
7838  for (unsigned i = 0; i < TrgNumElts; ++i)
7839  ShuffV.push_back(i * SizeMult);
7840  else
7841  for (unsigned i = 1; i <= TrgNumElts; ++i)
7842  ShuffV.push_back(i * SizeMult - 1);
7843 
7844  // Populate the remaining elements with undefs.
7845  for (unsigned i = TrgNumElts; i < WideNumElts; ++i)
7846  // ShuffV.push_back(i + WideNumElts);
7847  ShuffV.push_back(WideNumElts + 1);
7848 
7849  Op1 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op1);
7850  Op2 = DAG.getNode(ISD::BITCAST, DL, WideVT, Op2);
7851  return DAG.getVectorShuffle(WideVT, DL, Op1, Op2, ShuffV);
7852 }
7853 
7854 /// LowerSELECT_CC - Lower floating point select_cc's into fsel instruction when
7855 /// possible.
7856 SDValue PPCTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
7857  ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(4))->get();
7858  EVT ResVT = Op.getValueType();
7859  EVT CmpVT = Op.getOperand(0).getValueType();
7860  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
7861  SDValue TV = Op.getOperand(2), FV = Op.getOperand(3);
7862  SDLoc dl(Op);
7863 
7864  // Without power9-vector, we don't have native instruction for f128 comparison.
7865  // Following transformation to libcall is needed for setcc:
7866  // select_cc lhs, rhs, tv, fv, cc -> select_cc (setcc cc, x, y), 0, tv, fv, NE
7867  if (!Subtarget.hasP9Vector() && CmpVT == MVT::f128) {
7868  SDValue Z = DAG.getSetCC(
7869  dl, getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), CmpVT),
7870  LHS, RHS, CC);
7871  SDValue Zero = DAG.getConstant(0, dl, Z.getValueType());
7872  return DAG.getSelectCC(dl, Z, Zero, TV, FV, ISD::SETNE);
7873  }
7874 
7875  // Not FP, or using SPE? Not a fsel.
7876  if (!CmpVT.isFloatingPoint() || !TV.getValueType().isFloatingPoint() ||
7877  Subtarget.hasSPE())
7878  return Op;
7879 
7880  SDNodeFlags Flags = Op.getNode()->getFlags();
7881 
7882  // We have xsmaxcdp/xsmincdp which are OK to emit even in the
7883  // presence of infinities.
7884  if (Subtarget.hasP9Vector() && LHS == TV && RHS == FV) {
7885  switch (CC) {
7886  default:
7887  break;
7888  case ISD::SETOGT:
7889  case ISD::SETGT:
7890  return DAG.getNode(PPCISD::XSMAXCDP, dl, Op.getValueType(), LHS, RHS);
7891  case ISD::SETOLT:
7892  case ISD::SETLT:
7893  return DAG.getNode(PPCISD::XSMINCDP, dl, Op.getValueType(), LHS, RHS);
7894  }
7895  }
7896 
7897  // We might be able to do better than this under some circumstances, but in
7898  // general, fsel-based lowering of select is a finite-math-only optimization.
7899  // For more information, see section F.3 of the 2.06 ISA specification.
7900  // With ISA 3.0
7901  if ((!DAG.getTarget().Options.NoInfsFPMath && !Flags.hasNoInfs()) ||
7902  (!DAG.getTarget().Options.NoNaNsFPMath && !Flags.hasNoNaNs()))
7903  return Op;
7904 
7905  // If the RHS of the comparison is a 0.0, we don't need to do the
7906  // subtraction at all.
7907  SDValue Sel1;
7908  if (isFloatingPointZero(RHS))
7909  switch (CC) {
7910  default: break; // SETUO etc aren't handled by fsel.
7911  case ISD::SETNE:
7912  std::swap(TV, FV);
7914  case ISD::SETEQ:
7915  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7916  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7917  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7918  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7919  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7920  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7921  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), Sel1, FV);
7922  case ISD::SETULT:
7923  case ISD::SETLT:
7924  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7926  case ISD::SETOGE:
7927  case ISD::SETGE:
7928  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7929  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7930  return DAG.getNode(PPCISD::FSEL, dl, ResVT, LHS, TV, FV);
7931  case ISD::SETUGT:
7932  case ISD::SETGT:
7933  std::swap(TV, FV); // fsel is natively setge, swap operands for setlt
7935  case ISD::SETOLE:
7936  case ISD::SETLE:
7937  if (LHS.getValueType() == MVT::f32) // Comparison is always 64-bits
7938  LHS = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, LHS);
7939  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7940  DAG.getNode(ISD::FNEG, dl, MVT::f64, LHS), TV, FV);
7941  }
7942 
7943  SDValue Cmp;
7944  switch (CC) {
7945  default: break; // SETUO etc aren't handled by fsel.
7946  case ISD::SETNE:
7947  std::swap(TV, FV);
7949  case ISD::SETEQ:
7950  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7951  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7952  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7953  Sel1 = DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7954  if (Sel1.getValueType() == MVT::f32) // Comparison is always 64-bits
7955  Sel1 = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Sel1);
7956  return DAG.getNode(PPCISD::FSEL, dl, ResVT,
7957  DAG.getNode(ISD::FNEG, dl, MVT::f64, Cmp), Sel1, FV);
7958  case ISD::SETULT:
7959  case ISD::SETLT:
7960  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7961  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7962  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7963  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7964  case ISD::SETOGE:
7965  case ISD::SETGE:
7966  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, LHS, RHS, Flags);
7967  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7968  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7969  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7970  case ISD::SETUGT:
7971  case ISD::SETGT:
7972  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7973  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7974  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7975  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, FV, TV);
7976  case ISD::SETOLE:
7977  case ISD::SETLE:
7978  Cmp = DAG.getNode(ISD::FSUB, dl, CmpVT, RHS, LHS, Flags);
7979  if (Cmp.getValueType() == MVT::f32) // Comparison is always 64-bits
7980  Cmp = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Cmp);
7981  return DAG.getNode(PPCISD::FSEL, dl, ResVT, Cmp, TV, FV);
7982  }
7983  return Op;
7984 }
7985 
7986 static unsigned getPPCStrictOpcode(unsigned Opc) {
7987  switch (Opc) {
7988  default:
7989  llvm_unreachable("No strict version of this opcode!");
7990  case PPCISD::FCTIDZ:
7991  return PPCISD::STRICT_FCTIDZ;
7992  case PPCISD::FCTIWZ:
7993  return PPCISD::STRICT_FCTIWZ;
7994  case PPCISD::FCTIDUZ:
7995  return PPCISD::STRICT_FCTIDUZ;
7996  case PPCISD::FCTIWUZ:
7997  return PPCISD::STRICT_FCTIWUZ;
7998  case PPCISD::FCFID:
7999  return PPCISD::STRICT_FCFID;
8000  case PPCISD::FCFIDU:
8001  return PPCISD::STRICT_FCFIDU;
8002  case PPCISD::FCFIDS:
8003  return PPCISD::STRICT_FCFIDS;
8004  case PPCISD::FCFIDUS:
8005  return PPCISD::STRICT_FCFIDUS;
8006  }
8007 }
8008 
8010  const PPCSubtarget &Subtarget) {
8011  SDLoc dl(Op);
8012  bool IsStrict = Op->isStrictFPOpcode();
8013  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8014  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8015 
8016  // TODO: Any other flags to propagate?
8017  SDNodeFlags Flags;
8018  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8019 
8020  // For strict nodes, source is the second operand.
8021  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8022  SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue();
8023  assert(Src.getValueType().isFloatingPoint());
8024  if (Src.getValueType() == MVT::f32) {
8025  if (IsStrict) {
8026  Src =
8028  DAG.getVTList(MVT::f64, MVT::Other), {Chain, Src}, Flags);
8029  Chain = Src.getValue(1);
8030  } else
8031  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
8032  }
8033  SDValue Conv;
8034  unsigned Opc = ISD::DELETED_NODE;
8035  switch (Op.getSimpleValueType().SimpleTy) {
8036  default: llvm_unreachable("Unhandled FP_TO_INT type in custom expander!");
8037  case MVT::i32:
8038  Opc = IsSigned ? PPCISD::FCTIWZ
8039  : (Subtarget.hasFPCVT() ? PPCISD::FCTIWUZ : PPCISD::FCTIDZ);
8040  break;
8041  case MVT::i64:
8042  assert((IsSigned || Subtarget.hasFPCVT()) &&
8043  "i64 FP_TO_UINT is supported only with FPCVT");
8044  Opc = IsSigned ? PPCISD::FCTIDZ : PPCISD::FCTIDUZ;
8045  }
8046  if (IsStrict) {
8047  Opc = getPPCStrictOpcode(Opc);
8048  Conv = DAG.getNode(Opc, dl, DAG.getVTList(MVT::f64, MVT::Other),
8049  {Chain, Src}, Flags);
8050  } else {
8051  Conv = DAG.getNode(Opc, dl, MVT::f64, Src);
8052  }
8053  return Conv;
8054 }
8055 
8056 void PPCTargetLowering::LowerFP_TO_INTForReuse(SDValue Op, ReuseLoadInfo &RLI,
8057  SelectionDAG &DAG,
8058  const SDLoc &dl) const {
8059  SDValue Tmp = convertFPToInt(Op, DAG, Subtarget);
8060  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8061  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8062  bool IsStrict = Op->isStrictFPOpcode();
8063 
8064  // Convert the FP value to an int value through memory.
8065  bool i32Stack = Op.getValueType() == MVT::i32 && Subtarget.hasSTFIWX() &&
8066  (IsSigned || Subtarget.hasFPCVT());
8067  SDValue FIPtr = DAG.CreateStackTemporary(i32Stack ? MVT::i32 : MVT::f64);
8068  int FI = cast<FrameIndexSDNode>(FIPtr)->getIndex();
8069  MachinePointerInfo MPI =
8071 
8072  // Emit a store to the stack slot.
8073  SDValue Chain = IsStrict ? Tmp.getValue(1) : DAG.getEntryNode();
8074  Align Alignment(DAG.getEVTAlign(Tmp.getValueType()));
8075  if (i32Stack) {
8076  MachineFunction &MF = DAG.getMachineFunction();
8077  Alignment = Align(4);
8078  MachineMemOperand *MMO =
8079  MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Alignment);
8080  SDValue Ops[] = { Chain, Tmp, FIPtr };
8081  Chain = DAG.getMemIntrinsicNode(PPCISD::STFIWX, dl,
8082  DAG.getVTList(MVT::Other), Ops, MVT::i32, MMO);
8083  } else
8084  Chain = DAG.getStore(Chain, dl, Tmp, FIPtr, MPI, Alignment);
8085 
8086  // Result is a load from the stack slot. If loading 4 bytes, make sure to
8087  // add in a bias on big endian.
8088  if (Op.getValueType() == MVT::i32 && !i32Stack) {
8089  FIPtr = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr,
8090  DAG.getConstant(4, dl, FIPtr.getValueType()));
8091  MPI = MPI.getWithOffset(Subtarget.isLittleEndian() ? 0 : 4);
8092  }
8093 
8094  RLI.Chain = Chain;
8095  RLI.Ptr = FIPtr;
8096  RLI.MPI = MPI;
8097  RLI.Alignment = Alignment;
8098 }
8099 
8100 /// Custom lowers floating point to integer conversions to use
8101 /// the direct move instructions available in ISA 2.07 to avoid the
8102 /// need for load/store combinations.
8103 SDValue PPCTargetLowering::LowerFP_TO_INTDirectMove(SDValue Op,
8104  SelectionDAG &DAG,
8105  const SDLoc &dl) const {
8106  SDValue Conv = convertFPToInt(Op, DAG, Subtarget);
8107  SDValue Mov = DAG.getNode(PPCISD::MFVSR, dl, Op.getValueType(), Conv);
8108  if (Op->isStrictFPOpcode())
8109  return DAG.getMergeValues({Mov, Conv.getValue(1)}, dl);
8110  else
8111  return Mov;
8112 }
8113 
8114 SDValue PPCTargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG,
8115  const SDLoc &dl) const {
8116  bool IsStrict = Op->isStrictFPOpcode();
8117  bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
8118  Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
8119  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8120  EVT SrcVT = Src.getValueType();
8121  EVT DstVT = Op.getValueType();
8122 
8123  // FP to INT conversions are legal for f128.
8124  if (SrcVT == MVT::f128)
8125  return Subtarget.hasP9Vector() ? Op : SDValue();
8126 
8127  // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
8128  // PPC (the libcall is not available).
8129  if (SrcVT == MVT::ppcf128) {
8130  if (DstVT == MVT::i32) {
8131  // TODO: Conservatively pass only nofpexcept flag here. Need to check and
8132  // set other fast-math flags to FP operations in both strict and
8133  // non-strict cases. (FP_TO_SINT, FSUB)
8134  SDNodeFlags Flags;
8135  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8136 
8137  if (IsSigned) {
8139  DAG.getIntPtrConstant(0, dl));
8141  DAG.getIntPtrConstant(1, dl));
8142 
8143  // Add the two halves of the long double in round-to-zero mode, and use
8144  // a smaller FP_TO_SINT.
8145  if (IsStrict) {
8146  SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
8148  {Op.getOperand(0), Lo, Hi}, Flags);
8149  return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8151  {Res.getValue(1), Res}, Flags);
8152  } else {
8153  SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
8154  return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
8155  }
8156  } else {
8157  const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
8158  APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
8159  SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
8160  SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
8161  if (IsStrict) {
8162  // Sel = Src < 0x80000000
8163  // FltOfs = select Sel, 0.0, 0x80000000
8164  // IntOfs = select Sel, 0, 0x80000000
8165  // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
8166  SDValue Chain = Op.getOperand(0);
8167  EVT SetCCVT =
8168  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
8169  EVT DstSetCCVT =
8170  getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
8171  SDValue Sel = DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT,
8172  Chain, true);
8173  Chain = Sel.getValue(1);
8174 
8175  SDValue FltOfs = DAG.getSelect(
8176  dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
8177  Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
8178 
8179  SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
8180  DAG.getVTList(SrcVT, MVT::Other),
8181  {Chain, Src, FltOfs}, Flags);
8182  Chain = Val.getValue(1);
8183  SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
8184  DAG.getVTList(DstVT, MVT::Other),
8185  {Chain, Val}, Flags);
8186  Chain = SInt.getValue(1);
8187  SDValue IntOfs = DAG.getSelect(
8188  dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
8189  SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
8190  return DAG.getMergeValues({Result, Chain}, dl);
8191  } else {
8192  // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
8193  // FIXME: generated code sucks.
8194  SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
8195  True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
8196  True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
8197  SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
8198  return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
8199  }
8200  }
8201  }
8202 
8203  return SDValue();
8204  }
8205 
8206  if (Subtarget.hasDirectMove() && Subtarget.isPPC64())
8207  return LowerFP_TO_INTDirectMove(Op, DAG, dl);
8208 
8209  ReuseLoadInfo RLI;
8210  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8211 
8212  return DAG.getLoad(Op.getValueType(), dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8213  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8214 }
8215 
8216 // We're trying to insert a regular store, S, and then a load, L. If the
8217 // incoming value, O, is a load, we might just be able to have our load use the
8218 // address used by O. However, we don't know if anything else will store to
8219 // that address before we can load from it. To prevent this situation, we need
8220 // to insert our load, L, into the chain as a peer of O. To do this, we give L
8221 // the same chain operand as O, we create a token factor from the chain results
8222 // of O and L, and we replace all uses of O's chain result with that token
8223 // factor (see spliceIntoChain below for this last part).
8224 bool PPCTargetLowering::canReuseLoadAddress(SDValue Op, EVT MemVT,
8225  ReuseLoadInfo &RLI,
8226  SelectionDAG &DAG,
8227  ISD::LoadExtType ET) const {
8228  // Conservatively skip reusing for constrained FP nodes.
8229  if (Op->isStrictFPOpcode())
8230  return false;
8231 
8232  SDLoc dl(Op);
8233  bool ValidFPToUint = Op.getOpcode() == ISD::FP_TO_UINT &&
8234  (Subtarget.hasFPCVT() || Op.getValueType() == MVT::i32);
8235  if (ET == ISD::NON_EXTLOAD &&
8236  (ValidFPToUint || Op.getOpcode() == ISD::FP_TO_SINT) &&
8237  isOperationLegalOrCustom(Op.getOpcode(),
8238  Op.getOperand(0).getValueType())) {
8239 
8240  LowerFP_TO_INTForReuse(Op, RLI, DAG, dl);
8241  return true;
8242  }
8243 
8244  LoadSDNode *LD = dyn_cast<LoadSDNode>(Op);
8245  if (!LD || LD->getExtensionType() != ET || LD->isVolatile() ||
8246  LD->isNonTemporal())
8247  return false;
8248  if (LD->getMemoryVT() != MemVT)
8249  return false;
8250 
8251  // If the result of the load is an illegal type, then we can't build a
8252  // valid chain for reuse since the legalised loads and token factor node that
8253  // ties the legalised loads together uses a different output chain then the
8254  // illegal load.
8255  if (!isTypeLegal(LD->getValueType(0)))
8256  return false;
8257 
8258  RLI.Ptr = LD->getBasePtr();
8259  if (LD->isIndexed() && !LD->getOffset().isUndef()) {
8260  assert(LD->getAddressingMode() == ISD::PRE_INC &&
8261  "Non-pre-inc AM on PPC?");
8262  RLI.Ptr = DAG.getNode(ISD::ADD, dl, RLI.Ptr.getValueType(), RLI.Ptr,
8263  LD->getOffset());
8264  }
8265 
8266  RLI.Chain = LD->getChain();
8267  RLI.MPI = LD->getPointerInfo();
8268  RLI.IsDereferenceable = LD->isDereferenceable();
8269  RLI.IsInvariant = LD->isInvariant();
8270  RLI.Alignment = LD->getAlign();
8271  RLI.AAInfo = LD->getAAInfo();
8272  RLI.Ranges = LD->getRanges();
8273 
8274  RLI.ResChain = SDValue(LD, LD->isIndexed() ? 2 : 1);
8275  return true;
8276 }
8277 
8278 // Given the head of the old chain, ResChain, insert a token factor containing
8279 // it and NewResChain, and make users of ResChain now be users of that token
8280 // factor.
8281 // TODO: Remove and use DAG::makeEquivalentMemoryOrdering() instead.
8282 void PPCTargetLowering::spliceIntoChain(SDValue ResChain,
8283  SDValue NewResChain,
8284  SelectionDAG &DAG) const {
8285  if (!ResChain)
8286  return;
8287 
8288  SDLoc dl(NewResChain);
8289 
8291  NewResChain, DAG.getUNDEF(MVT::Other));
8292  assert(TF.getNode() != NewResChain.getNode() &&
8293  "A new TF really is required here");
8294 
8295  DAG.ReplaceAllUsesOfValueWith(ResChain, TF);
8296  DAG.UpdateNodeOperands(TF.getNode(), ResChain, NewResChain);
8297 }
8298 
8299 /// Analyze profitability of direct move
8300 /// prefer float load to int load plus direct move
8301 /// when there is no integer use of int load
8302 bool PPCTargetLowering::directMoveIsProfitable(const SDValue &Op) const {
8303  SDNode *Origin = Op.getOperand(0).getNode();
8304  if (Origin->getOpcode() != ISD::LOAD)
8305  return true;
8306 
8307  // If there is no LXSIBZX/LXSIHZX, like Power8,
8308  // prefer direct move if the memory size is 1 or 2 bytes.
8309  MachineMemOperand *MMO = cast<LoadSDNode>(Origin)->getMemOperand();
8310  if (!Subtarget.hasP9Vector() && MMO->getSize() <= 2)
8311  return true;
8312 
8313  for (SDNode::use_iterator UI = Origin->use_begin(),
8314  UE = Origin->use_end();
8315  UI != UE; ++UI) {
8316 
8317  // Only look at the users of the loaded value.
8318  if (UI.getUse().get().getResNo() != 0)
8319  continue;
8320 
8321  if (UI->getOpcode() != ISD::SINT_TO_FP &&
8322  UI->getOpcode() != ISD::UINT_TO_FP &&
8323  UI->getOpcode() != ISD::STRICT_SINT_TO_FP &&
8324  UI->getOpcode() != ISD::STRICT_UINT_TO_FP)
8325  return true;
8326  }
8327 
8328  return false;
8329 }
8330 
8332  const PPCSubtarget &Subtarget,
8333  SDValue Chain = SDValue()) {
8334  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8335  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8336  SDLoc dl(Op);
8337 
8338  // TODO: Any other flags to propagate?
8339  SDNodeFlags Flags;
8340  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8341 
8342  // If we have FCFIDS, then use it when converting to single-precision.
8343  // Otherwise, convert to double-precision and then round.
8344  bool IsSingle = Op.getValueType() == MVT::f32 && Subtarget.hasFPCVT();
8345  unsigned ConvOpc = IsSingle ? (IsSigned ? PPCISD::FCFIDS : PPCISD::FCFIDUS)
8346  : (IsSigned ? PPCISD::FCFID : PPCISD::FCFIDU);
8347  EVT ConvTy = IsSingle ? MVT::f32 : MVT::f64;
8348  if (Op->isStrictFPOpcode()) {
8349  if (!Chain)
8350  Chain = Op.getOperand(0);
8351  return DAG.getNode(getPPCStrictOpcode(ConvOpc), dl,
8352  DAG.getVTList(ConvTy, MVT::Other), {Chain, Src}, Flags);
8353  } else
8354  return DAG.getNode(ConvOpc, dl, ConvTy, Src);
8355 }
8356 
8357 /// Custom lowers integer to floating point conversions to use
8358 /// the direct move instructions available in ISA 2.07 to avoid the
8359 /// need for load/store combinations.
8360 SDValue PPCTargetLowering::LowerINT_TO_FPDirectMove(SDValue Op,
8361  SelectionDAG &DAG,
8362  const SDLoc &dl) const {
8363  assert((Op.getValueType() == MVT::f32 ||
8364  Op.getValueType() == MVT::f64) &&
8365  "Invalid floating point type as target of conversion");
8366  assert(Subtarget.hasFPCVT() &&
8367  "Int to FP conversions with direct moves require FPCVT");
8368  SDValue Src = Op.getOperand(Op->isStrictFPOpcode() ? 1 : 0);
8369  bool WordInt = Src.getSimpleValueType().SimpleTy == MVT::i32;
8370  bool Signed = Op.getOpcode() == ISD::SINT_TO_FP ||
8371  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8372  unsigned MovOpc = (WordInt && !Signed) ? PPCISD::MTVSRZ : PPCISD::MTVSRA;
8373  SDValue Mov = DAG.getNode(MovOpc, dl, MVT::f64, Src);
8374  return convertIntToFP(Op, Mov, DAG, Subtarget);
8375 }
8376 
8377 static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl) {
8378 
8379  EVT VecVT = Vec.getValueType();
8380  assert(VecVT.isVector() && "Expected a vector type.");
8381  assert(VecVT.getSizeInBits() < 128 && "Vector is already full width.");
8382 
8383  EVT EltVT = VecVT.getVectorElementType();
8384  unsigned WideNumElts = 128 / EltVT.getSizeInBits();
8385  EVT WideVT = EVT::getVectorVT(*DAG.getContext(), EltVT, WideNumElts);
8386 
8387  unsigned NumConcat = WideNumElts / VecVT.getVectorNumElements();
8388  SmallVector<SDValue, 16> Ops(NumConcat);
8389  Ops[0] = Vec;
8390  SDValue UndefVec = DAG.getUNDEF(VecVT);
8391  for (unsigned i = 1; i < NumConcat; ++i)
8392  Ops[i] = UndefVec;
8393 
8394  return DAG.getNode(ISD::CONCAT_VECTORS, dl, WideVT, Ops);
8395 }
8396 
8397 SDValue PPCTargetLowering::LowerINT_TO_FPVector(SDValue Op, SelectionDAG &DAG,
8398  const SDLoc &dl) const {
8399  bool IsStrict = Op->isStrictFPOpcode();
8400  unsigned Opc = Op.getOpcode();
8401  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8402  assert((Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP ||
8403  Opc == ISD::STRICT_UINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP) &&
8404  "Unexpected conversion type");
8405  assert((Op.getValueType() == MVT::v2f64 || Op.getValueType() == MVT::v4f32) &&
8406  "Supports conversions to v2f64/v4f32 only.");
8407 
8408  // TODO: Any other flags to propagate?
8409  SDNodeFlags Flags;
8410  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8411 
8412  bool SignedConv = Opc == ISD::SINT_TO_FP || Opc == ISD::STRICT_SINT_TO_FP;
8413  bool FourEltRes = Op.getValueType() == MVT::v4f32;
8414 
8415  SDValue Wide = widenVec(DAG, Src, dl);
8416  EVT WideVT = Wide.getValueType();
8417  unsigned WideNumElts = WideVT.getVectorNumElements();
8418  MVT IntermediateVT = FourEltRes ? MVT::v4i32 : MVT::v2i64;
8419 
8420  SmallVector<int, 16> ShuffV;
8421  for (unsigned i = 0; i < WideNumElts; ++i)
8422  ShuffV.push_back(i + WideNumElts);
8423 
8424  int Stride = FourEltRes ? WideNumElts / 4 : WideNumElts / 2;
8425  int SaveElts = FourEltRes ? 4 : 2;
8426  if (Subtarget.isLittleEndian())
8427  for (int i = 0; i < SaveElts; i++)
8428  ShuffV[i * Stride] = i;
8429  else
8430  for (int i = 1; i <= SaveElts; i++)
8431  ShuffV[i * Stride - 1] = i - 1;
8432 
8433  SDValue ShuffleSrc2 =
8434  SignedConv ? DAG.getUNDEF(WideVT) : DAG.getConstant(0, dl, WideVT);
8435  SDValue Arrange = DAG.getVectorShuffle(WideVT, dl, Wide, ShuffleSrc2, ShuffV);
8436 
8437  SDValue Extend;
8438  if (SignedConv) {
8439  Arrange = DAG.getBitcast(IntermediateVT, Arrange);
8440  EVT ExtVT = Src.getValueType();
8441  if (Subtarget.hasP9Altivec())
8442  ExtVT = EVT::getVectorVT(*DAG.getContext(), WideVT.getVectorElementType(),
8443  IntermediateVT.getVectorNumElements());
8444 
8445  Extend = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, IntermediateVT, Arrange,
8446  DAG.getValueType(ExtVT));
8447  } else
8448  Extend = DAG.getNode(ISD::BITCAST, dl, IntermediateVT, Arrange);
8449 
8450  if (IsStrict)
8451  return DAG.getNode(Opc, dl, DAG.getVTList(Op.getValueType(), MVT::Other),
8452  {Op.getOperand(0), Extend}, Flags);
8453 
8454  return DAG.getNode(Opc, dl, Op.getValueType(), Extend);
8455 }
8456 
8457 SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op,
8458  SelectionDAG &DAG) const {
8459  SDLoc dl(Op);
8460  bool IsSigned = Op.getOpcode() == ISD::SINT_TO_FP ||
8461  Op.getOpcode() == ISD::STRICT_SINT_TO_FP;
8462  bool IsStrict = Op->isStrictFPOpcode();
8463  SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
8464  SDValue Chain = IsStrict ? Op.getOperand(0) : DAG.getEntryNode();
8465 
8466  // TODO: Any other flags to propagate?
8467  SDNodeFlags Flags;
8468  Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
8469 
8470  EVT InVT = Src.getValueType();
8471  EVT OutVT = Op.getValueType();
8472  if (OutVT.isVector() && OutVT.isFloatingPoint() &&
8473  isOperationCustom(Op.getOpcode(), InVT))
8474  return LowerINT_TO_FPVector(Op, DAG, dl);
8475 
8476  // Conversions to f128 are legal.
8477  if (Op.getValueType() == MVT::f128)
8478  return Subtarget.hasP9Vector() ? Op : SDValue();
8479 
8480  // Don't handle ppc_fp128 here; let it be lowered to a libcall.
8481  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
8482  return SDValue();
8483 
8484  if (Src.getValueType() == MVT::i1) {
8485  SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src,
8486  DAG.getConstantFP(1.0, dl, Op.getValueType()),
8487  DAG.getConstantFP(0.0, dl, Op.getValueType()));
8488  if (IsStrict)
8489  return DAG.getMergeValues({Sel, Chain}, dl);
8490  else
8491  return Sel;
8492  }
8493 
8494  // If we have direct moves, we can do all the conversion, skip the store/load
8495  // however, without FPCVT we can't do most conversions.
8496  if (Subtarget.hasDirectMove() && directMoveIsProfitable(Op) &&
8497  Subtarget.isPPC64() && Subtarget.hasFPCVT())
8498  return LowerINT_TO_FPDirectMove(Op, DAG, dl);
8499 
8500  assert((IsSigned || Subtarget.hasFPCVT()) &&
8501  "UINT_TO_FP is supported only with FPCVT");
8502 
8503  if (Src.getValueType() == MVT::i64) {
8504  SDValue SINT = Src;
8505  // When converting to single-precision, we actually need to convert
8506  // to double-precision first and then round to single-precision.
8507  // To avoid double-rounding effects during that operation, we have
8508  // to prepare the input operand. Bits that might be truncated when
8509  // converting to double-precision are replaced by a bit that won't
8510  // be lost at this stage, but is below the single-precision rounding
8511  // position.
8512  //
8513  // However, if -enable-unsafe-fp-math is in effect, accept double
8514  // rounding to avoid the extra overhead.
8515  if (Op.getValueType() == MVT::f32 &&
8516  !Subtarget.hasFPCVT() &&
8517  !DAG.getTarget().Options.UnsafeFPMath) {
8518 
8519  // Twiddle input to make sure the low 11 bits are zero. (If this
8520  // is the case, we are guaranteed the value will fit into the 53 bit
8521  // mantissa of an IEEE double-precision value without rounding.)
8522  // If any of those low 11 bits were not zero originally, make sure
8523  // bit 12 (value 2048) is set instead, so that the final rounding
8524  // to single-precision gets the correct result.
8525  SDValue Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8526  SINT, DAG.getConstant(2047, dl, MVT::i64));
8527  Round = DAG.getNode(ISD::ADD, dl, MVT::i64,
8528  Round, DAG.getConstant(2047, dl, MVT::i64));
8529  Round = DAG.getNode(ISD::OR, dl, MVT::i64, Round, SINT);
8530  Round = DAG.getNode(ISD::AND, dl, MVT::i64,
8531  Round, DAG.getConstant(-2048, dl, MVT::i64));
8532 
8533  // However, we cannot use that value unconditionally: if the magnitude
8534  // of the input value is small, the bit-twiddling we did above might
8535  // end up visibly changing the output. Fortunately, in that case, we
8536  // don't need to twiddle bits since the original input will convert
8537  // exactly to double-precision floating-point already. Therefore,
8538  // construct a conditional to use the original value if the top 11
8539  // bits are all sign-bit copies, and use the rounded value computed
8540  // above otherwise.
8541  SDValue Cond = DAG.getNode(ISD::SRA, dl, MVT::i64,
8542  SINT, DAG.getConstant(53, dl, MVT::i32));
8543  Cond = DAG.getNode(ISD::ADD, dl, MVT::i64,
8544  Cond, DAG.getConstant(1, dl, MVT::i64));
8545  Cond = DAG.getSetCC(
8546  dl,
8548  Cond, DAG.getConstant(1, dl, MVT::i64), ISD::SETUGT);
8549 
8550  SINT = DAG.getNode(ISD::SELECT, dl, MVT::i64, Cond, Round, SINT);
8551  }
8552 
8553  ReuseLoadInfo RLI;
8554  SDValue Bits;
8555 
8556  MachineFunction &MF = DAG.getMachineFunction();
8557  if (canReuseLoadAddress(SINT, MVT::i64, RLI, DAG)) {
8558  Bits = DAG.getLoad(MVT::f64, dl, RLI.Chain, RLI.Ptr, RLI.MPI,
8559  RLI.Alignment, RLI.MMOFlags(), RLI.AAInfo, RLI.Ranges);
8560  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8561  } else if (Subtarget.hasLFIWAX() &&
8562  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::SEXTLOAD)) {
8563  MachineMemOperand *MMO =
8565  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8566  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8569  Ops, MVT::i32, MMO);
8570  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8571  } else if (Subtarget.hasFPCVT() &&
8572  canReuseLoadAddress(SINT, MVT::i32, RLI, DAG, ISD::ZEXTLOAD)) {
8573  MachineMemOperand *MMO =
8575  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8576  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8579  Ops, MVT::i32, MMO);
8580  spliceIntoChain(RLI.ResChain, Bits.getValue(1), DAG);
8581  } else if (((Subtarget.hasLFIWAX() &&
8582  SINT.getOpcode() == ISD::SIGN_EXTEND) ||
8583  (Subtarget.hasFPCVT() &&
8584  SINT.getOpcode() == ISD::ZERO_EXTEND)) &&
8585  SINT.getOperand(0).getValueType() == MVT::i32) {
8586  MachineFrameInfo &MFI = MF.getFrameInfo();
8587  EVT PtrVT = getPointerTy(DAG.getDataLayout());
8588 
8589  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8590  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8591 
8592  SDValue Store = DAG.getStore(Chain, dl, SINT.getOperand(0), FIdx,
8594  DAG.getMachineFunction(), FrameIdx));
8595  Chain = Store;
8596 
8597  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8598  "Expected an i32 store");
8599 
8600  RLI.Ptr = FIdx;
8601  RLI.Chain = Chain;
8602  RLI.MPI =
8604  RLI.Alignment = Align(4);
8605 
8606  MachineMemOperand *MMO =
8608  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8609  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8612  dl, DAG.getVTList(MVT::f64, MVT::Other),
8613  Ops, MVT::i32, MMO);
8614  Chain = Bits.getValue(1);
8615  } else
8616  Bits = DAG.getNode(ISD::BITCAST, dl, MVT::f64, SINT);
8617 
8618  SDValue FP = convertIntToFP(Op, Bits, DAG, Subtarget, Chain);
8619  if (IsStrict)
8620  Chain = FP.getValue(1);
8621 
8622  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8623  if (IsStrict)
8624  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8626  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8627  else
8628  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8629  DAG.getIntPtrConstant(0, dl));
8630  }
8631  return FP;
8632  }
8633 
8634  assert(Src.getValueType() == MVT::i32 &&
8635  "Unhandled INT_TO_FP type in custom expander!");
8636  // Since we only generate this in 64-bit mode, we can take advantage of
8637  // 64-bit registers. In particular, sign extend the input value into the
8638  // 64-bit register with extsw, store the WHOLE 64-bit value into the stack
8639  // then lfd it and fcfid it.
8640  MachineFunction &MF = DAG.getMachineFunction();
8641  MachineFrameInfo &MFI = MF.getFrameInfo();
8642  EVT PtrVT = getPointerTy(MF.getDataLayout());
8643 
8644  SDValue Ld;
8645  if (Subtarget.hasLFIWAX() || Subtarget.hasFPCVT()) {
8646  ReuseLoadInfo RLI;
8647  bool ReusingLoad;
8648  if (!(ReusingLoad = canReuseLoadAddress(Src, MVT::i32, RLI, DAG))) {
8649  int FrameIdx = MFI.CreateStackObject(4, Align(4), false);
8650  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8651 
8652  SDValue Store = DAG.getStore(Chain, dl, Src, FIdx,
8654  DAG.getMachineFunction(), FrameIdx));
8655  Chain = Store;
8656 
8657  assert(cast<StoreSDNode>(Store)->getMemoryVT() == MVT::i32 &&
8658  "Expected an i32 store");
8659 
8660  RLI.Ptr = FIdx;
8661  RLI.Chain = Chain;
8662  RLI.MPI =
8664  RLI.Alignment = Align(4);
8665  }
8666 
8667  MachineMemOperand *MMO =
8669  RLI.Alignment, RLI.AAInfo, RLI.Ranges);
8670  SDValue Ops[] = { RLI.Chain, RLI.Ptr };
8671  Ld = DAG.getMemIntrinsicNode(IsSigned ? PPCISD::LFIWAX : PPCISD::LFIWZX, dl,
8672  DAG.getVTList(MVT::f64, MVT::Other), Ops,
8673  MVT::i32, MMO);
8674  Chain = Ld.getValue(1);
8675  if (ReusingLoad)
8676  spliceIntoChain(RLI.ResChain, Ld.getValue(1), DAG);
8677  } else {
8678  assert(Subtarget.isPPC64() &&
8679  "i32->FP without LFIWAX supported only on PPC64");
8680 
8681  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
8682  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
8683 
8684  SDValue Ext64 = DAG.getNode(ISD::SIGN_EXTEND, dl, MVT::i64, Src);
8685 
8686  // STD the extended value into the stack slot.
8687  SDValue Store = DAG.getStore(
8688  Chain, dl, Ext64, FIdx,
8690  Chain = Store;
8691 
8692  // Load the value as a double.
8693  Ld = DAG.getLoad(
8694  MVT::f64, dl, Chain, FIdx,
8696  Chain = Ld.getValue(1);
8697  }
8698 
8699  // FCFID it and return it.
8700  SDValue FP = convertIntToFP(Op, Ld, DAG, Subtarget, Chain);
8701  if (IsStrict)
8702  Chain = FP.getValue(1);
8703  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
8704  if (IsStrict)
8705  FP = DAG.getNode(ISD::STRICT_FP_ROUND, dl,
8707  {Chain, FP, DAG.getIntPtrConstant(0, dl)}, Flags);
8708  else
8709  FP = DAG.getNode(ISD::FP_ROUND, dl, MVT::f32, FP,
8710  DAG.getIntPtrConstant(0, dl));
8711  }
8712  return FP;
8713 }
8714 
8715 SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op,
8716  SelectionDAG &DAG) const {
8717  SDLoc dl(Op);
8718  /*
8719  The rounding mode is in bits 30:31 of FPSR, and has the following
8720  settings:
8721  00 Round to nearest
8722  01 Round to 0
8723  10 Round to +inf
8724  11 Round to -inf
8725 
8726  FLT_ROUNDS, on the other hand, expects the following:
8727  -1 Undefined
8728  0 Round to 0
8729  1 Round to nearest
8730  2 Round to +inf
8731  3 Round to -inf
8732 
8733  To perform the conversion, we do:
8734  ((FPSCR & 0x3) ^ ((~FPSCR & 0x3) >> 1))
8735  */
8736 
8737  MachineFunction &MF = DAG.getMachineFunction();
8738  EVT VT = Op.getValueType();
8739  EVT PtrVT = getPointerTy(MF.getDataLayout());
8740 
8741  // Save FP Control Word to register
8742  SDValue Chain = Op.getOperand(0);
8743  SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain);
8744  Chain = MFFS.getValue(1);
8745 
8746  SDValue CWD;
8747  if (isTypeLegal(MVT::i64)) {
8748  CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32,
8749  DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS));
8750  } else {
8751  // Save FP register to stack slot
8752  int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false);
8753  SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
8754  Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo());
8755 
8756  // Load FP Control Word from low 32 bits of stack slot.
8758  "Stack slot adjustment is valid only on big endian subtargets!");
8759  SDValue Four = DAG.getConstant(4, dl, PtrVT);
8760  SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four);
8761  CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo());
8762  Chain = CWD.getValue(1);
8763  }
8764 
8765  // Transform as necessary
8766  SDValue CWD1 =
8767  DAG.getNode(ISD::AND, dl, MVT::i32,
8768  CWD, DAG.getConstant(3, dl, MVT::i32));
8769  SDValue CWD2 =
8770  DAG.getNode(ISD::SRL, dl, MVT::i32,
8771  DAG.getNode(ISD::AND, dl, MVT::i32,
8772  DAG.getNode(ISD::XOR, dl, MVT::i32,
8773  CWD, DAG.getConstant(3, dl, MVT::i32)),
8774  DAG.getConstant(3, dl, MVT::i32)),
8775  DAG.getConstant(1, dl, MVT::i32));
8776 
8777  SDValue RetVal =
8778  DAG.getNode(ISD::XOR, dl, MVT::i32, CWD1, CWD2);
8779 
8780  RetVal =
8782  dl, VT, RetVal);
8783 
8784  return DAG.getMergeValues({RetVal, Chain}, dl);
8785 }
8786 
8787 SDValue PPCTargetLowering::LowerSHL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8788  EVT VT = Op.getValueType();
8789  unsigned BitWidth = VT.getSizeInBits();
8790  SDLoc dl(Op);
8791  assert(Op.getNumOperands() == 3 &&
8792  VT == Op.getOperand(1).getValueType() &&
8793  "Unexpected SHL!");
8794 
8795  // Expand into a bunch of logical ops. Note that these ops
8796  // depend on the PPC behavior for oversized shift amounts.
8797  SDValue Lo = Op.getOperand(0);
8798  SDValue Hi = Op.getOperand(1);
8799  SDValue Amt = Op.getOperand(2);
8800  EVT AmtVT = Amt.getValueType();
8801 
8802  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8803  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8804  SDValue Tmp2 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Amt);
8805  SDValue Tmp3 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Tmp1);
8806  SDValue Tmp4 = DAG.getNode(ISD::OR , dl, VT, Tmp2, Tmp3);
8807  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8808  DAG.getConstant(-BitWidth, dl, AmtVT));
8809  SDValue Tmp6 = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Tmp5);
8810  SDValue OutHi = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8811  SDValue OutLo = DAG.getNode(PPCISD::SHL, dl, VT, Lo, Amt);
8812  SDValue OutOps[] = { OutLo, OutHi };
8813  return DAG.getMergeValues(OutOps, dl);
8814 }
8815 
8816 SDValue PPCTargetLowering::LowerSRL_PARTS(SDValue Op, SelectionDAG &DAG) const {
8817  EVT VT = Op.getValueType();
8818  SDLoc dl(Op);
8819  unsigned BitWidth = VT.getSizeInBits();
8820  assert(Op.getNumOperands() == 3 &&
8821  VT == Op.getOperand(1).getValueType() &&
8822  "Unexpected SRL!");
8823 
8824  // Expand into a bunch of logical ops. Note that these ops
8825  // depend on the PPC behavior for oversized shift amounts.
8826  SDValue Lo = Op.getOperand(0);
8827  SDValue Hi = Op.getOperand(1);
8828  SDValue Amt = Op.getOperand(2);
8829  EVT AmtVT = Amt.getValueType();
8830 
8831  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8832  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8833  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8834  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8835  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8836  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8837  DAG.getConstant(-BitWidth, dl, AmtVT));
8838  SDValue Tmp6 = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Tmp5);
8839  SDValue OutLo = DAG.getNode(ISD::OR, dl, VT, Tmp4, Tmp6);
8840  SDValue OutHi = DAG.getNode(PPCISD::SRL, dl, VT, Hi, Amt);
8841  SDValue OutOps[] = { OutLo, OutHi };
8842  return DAG.getMergeValues(OutOps, dl);
8843 }
8844 
8845 SDValue PPCTargetLowering::LowerSRA_PARTS(SDValue Op, SelectionDAG &DAG) const {
8846  SDLoc dl(Op);
8847  EVT VT = Op.getValueType();
8848  unsigned BitWidth = VT.getSizeInBits();
8849  assert(Op.getNumOperands() == 3 &&
8850  VT == Op.getOperand(1).getValueType() &&
8851  "Unexpected SRA!");
8852 
8853  // Expand into a bunch of logical ops, followed by a select_cc.
8854  SDValue Lo = Op.getOperand(0);
8855  SDValue Hi = Op.getOperand(1);
8856  SDValue Amt = Op.getOperand(2);
8857  EVT AmtVT = Amt.getValueType();
8858 
8859  SDValue Tmp1 = DAG.getNode(ISD::SUB, dl, AmtVT,
8860  DAG.getConstant(BitWidth, dl, AmtVT), Amt);
8861  SDValue Tmp2 = DAG.getNode(PPCISD::SRL, dl, VT, Lo, Amt);
8862  SDValue Tmp3 = DAG.getNode(PPCISD::SHL, dl, VT, Hi, Tmp1);
8863  SDValue Tmp4 = DAG.getNode(ISD::OR, dl, VT, Tmp2, Tmp3);
8864  SDValue Tmp5 = DAG.getNode(ISD::ADD, dl, AmtVT, Amt,
8865  DAG.getConstant(-BitWidth, dl, AmtVT));
8866  SDValue Tmp6 = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Tmp5);
8867  SDValue OutHi = DAG.getNode(PPCISD::SRA, dl, VT, Hi, Amt);
8868  SDValue OutLo = DAG.getSelectCC(dl, Tmp5, DAG.getConstant(0, dl, AmtVT),
8869  Tmp4, Tmp6, ISD::SETLE);
8870  SDValue OutOps[] = { OutLo, OutHi };
8871  return DAG.getMergeValues(OutOps, dl);
8872 }
8873 
8874 SDValue PPCTargetLowering::LowerFunnelShift(SDValue Op,
8875  SelectionDAG &DAG) const {
8876  SDLoc dl(Op);
8877  EVT VT = Op.getValueType();
8878  unsigned BitWidth = VT.getSizeInBits();
8879 
8880  bool IsFSHL = Op.getOpcode() == ISD::FSHL;
8881  SDValue X = Op.getOperand(0);
8882  SDValue Y = Op.getOperand(1);
8883  SDValue Z = Op.getOperand(2);
8884  EVT AmtVT = Z.getValueType();
8885 
8886  // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
8887  // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
8888  // This is simpler than TargetLowering::expandFunnelShift because we can rely
8889  // on PowerPC shift by BW being well defined.
8890  Z = DAG.getNode(ISD::AND, dl, AmtVT, Z,
8891  DAG.getConstant(BitWidth - 1, dl, AmtVT));
8892  SDValue SubZ =
8893  DAG.getNode(ISD::SUB, dl, AmtVT, DAG.getConstant(BitWidth, dl, AmtVT), Z);
8894  X = DAG.getNode(PPCISD::SHL, dl, VT, X, IsFSHL ? Z : SubZ);
8895  Y = DAG.getNode(PPCISD::SRL, dl, VT, Y, IsFSHL ? SubZ : Z);
8896  return DAG.getNode(ISD::OR, dl, VT, X, Y);
8897 }
8898 
8899 //===----------------------------------------------------------------------===//
8900 // Vector related lowering.
8901 //
8902 
8903 /// getCanonicalConstSplat - Build a canonical splat immediate of Val with an
8904 /// element size of SplatSize. Cast the result to VT.
8905 static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT,
8906  SelectionDAG &DAG, const SDLoc &dl) {
8907  static const MVT VTys[] = { // canonical VT to use for each size.
8909  };
8910 
8911  EVT ReqVT = VT != MVT::Other ? VT : VTys[SplatSize-1];
8912 
8913  // For a splat with all ones, turn it to vspltisb 0xFF to canonicalize.
8914  if (Val == ((1LLU << (SplatSize * 8)) - 1)) {
8915  SplatSize = 1;
8916  Val = 0xFF;
8917  }
8918 
8919  EVT CanonicalVT = VTys[SplatSize-1];
8920 
8921  // Build a canonical splat for this value.
8922  return DAG.getBitcast(ReqVT, DAG.getConstant(Val, dl, CanonicalVT));
8923 }
8924 
8925 /// BuildIntrinsicOp - Return a unary operator intrinsic node with the
8926 /// specified intrinsic ID.
8927 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG,
8928  const SDLoc &dl, EVT DestVT = MVT::Other) {
8929  if (DestVT == MVT::Other) DestVT = Op.getValueType();
8930  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8931  DAG.getConstant(IID, dl, MVT::i32), Op);
8932 }
8933 
8934 /// BuildIntrinsicOp - Return a binary operator intrinsic node with the
8935 /// specified intrinsic ID.
8937  SelectionDAG &DAG, const SDLoc &dl,
8938  EVT DestVT = MVT::Other) {
8939  if (DestVT == MVT::Other) DestVT = LHS.getValueType();
8940  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8941  DAG.getConstant(IID, dl, MVT::i32), LHS, RHS);
8942 }
8943 
8944 /// BuildIntrinsicOp - Return a ternary operator intrinsic node with the
8945 /// specified intrinsic ID.
8946 static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op0, SDValue Op1,
8947  SDValue Op2, SelectionDAG &DAG, const SDLoc &dl,
8948  EVT DestVT = MVT::Other) {
8949  if (DestVT == MVT::Other) DestVT = Op0.getValueType();
8950  return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT,
8951  DAG.getConstant(IID, dl, MVT::i32), Op0, Op1, Op2);
8952 }
8953 
8954 /// BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified
8955 /// amount. The result has the specified value type.
8956 static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
8957  SelectionDAG &DAG, const SDLoc &dl) {
8958  // Force LHS/RHS to be the right type.
8959  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, LHS);
8960  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, RHS);
8961 
8962  int Ops[16];
8963  for (unsigned i = 0; i != 16; ++i)
8964  Ops[i] = i + Amt;
8965  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, LHS, RHS, Ops);
8966  return DAG.getNode(ISD::BITCAST, dl, VT, T);
8967 }
8968 
8969 /// Do we have an efficient pattern in a .td file for this node?
8970 ///
8971 /// \param V - pointer to the BuildVectorSDNode being matched
8972 /// \param HasDirectMove - does this subtarget have VSR <-> GPR direct moves?
8973 ///
8974 /// There are some patterns where it is beneficial to keep a BUILD_VECTOR
8975 /// node as a BUILD_VECTOR node rather than expanding it. The patterns where
8976 /// the opposite is true (expansion is beneficial) are:
8977 /// - The node builds a vector out of integers that are not 32 or 64-bits
8978 /// - The node builds a vector out of constants
8979 /// - The node is a "load-and-splat"
8980 /// In all other cases, we will choose to keep the BUILD_VECTOR.
8982  bool HasDirectMove,
8983  bool HasP8Vector) {
8984  EVT VecVT = V->getValueType(0);
8985  bool RightType = VecVT == MVT::v2f64 ||
8986  (HasP8Vector && VecVT == MVT::v4f32) ||
8987  (HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
8988  if (!RightType)
8989  return false;
8990 
8991  bool IsSplat = true;
8992  bool IsLoad = false;
8993  SDValue Op0 = V->getOperand(0);
8994 
8995  // This function is called in a block that confirms the node is not a constant
8996  // splat. So a constant BUILD_VECTOR here means the vector is built out of
8997  // different constants.
8998  if (V->isConstant())
8999  return false;
9000  for (int i = 0, e = V->getNumOperands(); i < e; ++i) {
9001  if (V->getOperand(i).isUndef())
9002  return false;
9003  // We want to expand nodes that represent load-and-splat even if the
9004  // loaded value is a floating point truncation or conversion to int.
9005  if (V->getOperand(i).getOpcode() == ISD::LOAD ||
9006  (V->getOperand(i).getOpcode() == ISD::FP_ROUND &&
9007  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9008  (V->getOperand(i).getOpcode() == ISD::FP_TO_SINT &&
9009  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD) ||
9010  (V->getOperand(i).getOpcode() == ISD::FP_TO_UINT &&
9011  V->getOperand(i).getOperand(0).getOpcode() == ISD::LOAD))
9012  IsLoad = true;
9013  // If the operands are different or the input is not a load and has more
9014  // uses than just this BV node, then it isn't a splat.
9015  if (V->getOperand(i) != Op0 ||
9016  (!IsLoad && !V->isOnlyUserOf(V->getOperand(i).getNode())))
9017  IsSplat = false;
9018  }
9019  return !(IsSplat && IsLoad);
9020 }
9021 
9022 // Lower BITCAST(f128, (build_pair i64, i64)) to BUILD_FP128.
9023 SDValue PPCTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
9024 
9025  SDLoc dl(Op);
9026  SDValue Op0 = Op->getOperand(0);
9027 
9028  if ((Op.getValueType() != MVT::f128) ||
9029  (Op0.getOpcode() != ISD::BUILD_PAIR) ||
9030  (Op0.getOperand(0).getValueType() != MVT::i64) ||
9031  (Op0.getOperand(1).getValueType() != MVT::i64))
9032  return SDValue();
9033 
9034  return DAG.getNode(PPCISD::BUILD_FP128, dl, MVT::f128, Op0.getOperand(0),
9035  Op0.getOperand(1));
9036 }
9037 
9038 static const SDValue *getNormalLoadInput(const SDValue &Op, bool &IsPermuted) {
9039  const SDValue *InputLoad = &Op;
9040  if (InputLoad->getOpcode() == ISD::BITCAST)
9041  InputLoad = &InputLoad->getOperand(0);
9042  if (InputLoad->getOpcode() == ISD::SCALAR_TO_VECTOR ||
9043  InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED) {
9044  IsPermuted = InputLoad->getOpcode() == PPCISD::SCALAR_TO_VECTOR_PERMUTED;
9045  InputLoad = &InputLoad->getOperand(0);
9046  }
9047  if (InputLoad->getOpcode() != ISD::LOAD)
9048  return nullptr;
9049  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9050  return ISD::isNormalLoad(LD) ? InputLoad : nullptr;
9051 }
9052 
9053 // Convert the argument APFloat to a single precision APFloat if there is no
9054 // loss in information during the conversion to single precision APFloat and the
9055 // resulting number is not a denormal number. Return true if successful.
9057  APFloat APFloatToConvert = ArgAPFloat;
9058  bool LosesInfo = true;
9060  &LosesInfo);
9061  bool Success = (!LosesInfo && !APFloatToConvert.isDenormal());
9062  if (Success)
9063  ArgAPFloat = APFloatToConvert;
9064  return Success;
9065 }
9066 
9067 // Bitcast the argument APInt to a double and convert it to a single precision
9068 // APFloat, bitcast the APFloat to an APInt and assign it to the original
9069 // argument if there is no loss in information during the conversion from
9070 // double to single precision APFloat and the resulting number is not a denormal
9071 // number. Return true if successful.
9073  double DpValue = ArgAPInt.bitsToDouble();
9074  APFloat APFloatDp(DpValue);
9075  bool Success = convertToNonDenormSingle(APFloatDp);
9076  if (Success)
9077  ArgAPInt = APFloatDp.bitcastToAPInt();
9078  return Success;
9079 }
9080 
9081 // Nondestructive check for convertTonNonDenormSingle.
9083  // Only convert if it loses info, since XXSPLTIDP should
9084  // handle the other case.
9085  APFloat APFloatToConvert = ArgAPFloat;
9086  bool LosesInfo = true;
9088  &LosesInfo);
9089 
9090  return (!LosesInfo && !APFloatToConvert.isDenormal());
9091 }
9092 
9093 static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op,
9094  unsigned &Opcode) {
9095  LoadSDNode *InputNode = dyn_cast<LoadSDNode>(Op.getOperand(0));
9096  if (!InputNode || !Subtarget.hasVSX() || !ISD::isUNINDEXEDLoad(InputNode))
9097  return false;
9098 
9099  EVT Ty = Op->getValueType(0);
9100  // For v2f64, v4f32 and v4i32 types, we require the load to be non-extending
9101  // as we cannot handle extending loads for these types.
9102  if ((Ty == MVT::v2f64 || Ty == MVT::v4f32 || Ty == MVT::v4i32) &&
9103  ISD::isNON_EXTLoad(InputNode))
9104  return true;
9105 
9106  EVT MemVT = InputNode->getMemoryVT();
9107  // For v8i16 and v16i8 types, extending loads can be handled as long as the
9108  // memory VT is the same vector element VT type.
9109  // The loads feeding into the v8i16 and v16i8 types will be extending because
9110  // scalar i8/i16 are not legal types.
9111  if ((Ty == MVT::v8i16 || Ty == MVT::v16i8) && ISD::isEXTLoad(InputNode) &&
9112  (MemVT == Ty.getVectorElementType()))
9113  return true;
9114 
9115  if (Ty == MVT::v2i64) {
9116  // Check the extend type, when the input type is i32, and the output vector
9117  // type is v2i64.
9118  if (MemVT == MVT::i32) {
9119  if (ISD::isZEXTLoad(InputNode))
9120  Opcode = PPCISD::ZEXT_LD_SPLAT;
9121  if (ISD::isSEXTLoad(InputNode))
9122  Opcode = PPCISD::SEXT_LD_SPLAT;
9123  }
9124  return true;
9125  }
9126  return false;
9127 }
9128 
9129 // If this is a case we can't handle, return null and let the default
9130 // expansion code take care of it. If we CAN select this case, and if it
9131 // selects to a single instruction, return Op. Otherwise, if we can codegen
9132 // this case more efficiently than a constant pool load, lower it to the
9133 // sequence of ops that should be used.
9134 SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
9135  SelectionDAG &DAG) const {
9136  SDLoc dl(Op);
9137  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(Op.getNode());
9138  assert(BVN && "Expected a BuildVectorSDNode in LowerBUILD_VECTOR");
9139 
9140  // Check if this is a splat of a constant value.
9141  APInt APSplatBits, APSplatUndef;
9142  unsigned SplatBitSize;
9143  bool HasAnyUndefs;
9144  bool BVNIsConstantSplat =
9145  BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
9146  HasAnyUndefs, 0, !Subtarget.isLittleEndian());
9147 
9148  // If it is a splat of a double, check if we can shrink it to a 32 bit
9149  // non-denormal float which when converted back to double gives us the same
9150  // double. This is to exploit the XXSPLTIDP instruction.
9151  // If we lose precision, we use XXSPLTI32DX.
9152  if (BVNIsConstantSplat && (SplatBitSize == 64) &&
9153  Subtarget.hasPrefixInstrs()) {
9154  // Check the type first to short-circuit so we don't modify APSplatBits if
9155  // this block isn't executed.
9156  if ((Op->getValueType(0) == MVT::v2f64) &&
9157  convertToNonDenormSingle(APSplatBits)) {
9158  SDValue SplatNode = DAG.getNode(
9160  DAG.getTargetConstant(APSplatBits.getZExtValue(), dl, MVT::i32));
9161  return DAG.getBitcast(Op.getValueType(), SplatNode);
9162  } else {
9163  // We may lose precision, so we have to use XXSPLTI32DX.
9164 
9165  uint32_t Hi =
9166  (uint32_t)((APSplatBits.getZExtValue() & 0xFFFFFFFF00000000LL) >> 32);
9167  uint32_t Lo =
9168  (uint32_t)(APSplatBits.getZExtValue() & 0xFFFFFFFF);
9169  SDValue SplatNode = DAG.getUNDEF(MVT::v2i64);
9170 
9171  if (!Hi || !Lo)
9172  // If either load is 0, then we should generate XXLXOR to set to 0.
9173  SplatNode = DAG.getTargetConstant(0, dl, MVT::v2i64);
9174 
9175  if (Hi)
9176  SplatNode = DAG.getNode(
9177  PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9178  DAG.getTargetConstant(0, dl, MVT::i32),
9179  DAG.getTargetConstant(Hi, dl, MVT::i32));
9180 
9181  if (Lo)
9182  SplatNode =
9183  DAG.getNode(PPCISD::XXSPLTI32DX, dl, MVT::v2i64, SplatNode,
9184  DAG.getTargetConstant(1, dl, MVT::i32),
9185  DAG.getTargetConstant(Lo, dl, MVT::i32));
9186 
9187  return DAG.getBitcast(Op.getValueType(), SplatNode);
9188  }
9189  }
9190 
9191  if (!BVNIsConstantSplat || SplatBitSize > 32) {
9192  unsigned NewOpcode = PPCISD::LD_SPLAT;
9193 
9194  // Handle load-and-splat patterns as we have instructions that will do this
9195  // in one go.
9196  if (DAG.isSplatValue(Op, true) &&
9197  isValidSplatLoad(Subtarget, Op, NewOpcode)) {
9198  const SDValue *InputLoad = &Op.getOperand(0);
9199  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9200 
9201  // If the input load is an extending load, it will be an i32 -> i64
9202  // extending load and isValidSplatLoad() will update NewOpcode.
9203  unsigned MemorySize = LD->getMemoryVT().getScalarSizeInBits();
9204  unsigned ElementSize =
9205  MemorySize * ((NewOpcode == PPCISD::LD_SPLAT) ? 1 : 2);
9206 
9207  assert(((ElementSize == 2 * MemorySize)
9208  ? (NewOpcode == PPCISD::ZEXT_LD_SPLAT ||
9209  NewOpcode == PPCISD::SEXT_LD_SPLAT)
9210  : (NewOpcode == PPCISD::LD_SPLAT)) &&
9211  "Unmatched element size and opcode!\n");
9212 
9213  // Checking for a single use of this load, we have to check for vector
9214  // width (128 bits) / ElementSize uses (since each operand of the
9215  // BUILD_VECTOR is a separate use of the value.
9216  unsigned NumUsesOfInputLD = 128 / ElementSize;
9217  for (SDValue BVInOp : Op->ops())
9218  if (BVInOp.isUndef())
9219  NumUsesOfInputLD--;
9220 
9221  // Exclude somes case where LD_SPLAT is worse than scalar_to_vector:
9222  // Below cases should also happen for "lfiwzx/lfiwax + LE target + index
9223  // 1" and "lxvrhx + BE target + index 7" and "lxvrbx + BE target + index
9224  // 15", but funciton IsValidSplatLoad() now will only return true when
9225  // the data at index 0 is not nullptr. So we will not get into trouble for
9226  // these cases.
9227  //
9228  // case 1 - lfiwzx/lfiwax
9229  // 1.1: load result is i32 and is sign/zero extend to i64;
9230  // 1.2: build a v2i64 vector type with above loaded value;
9231  // 1.3: the vector has only one value at index 0, others are all undef;
9232  // 1.4: on BE target, so that lfiwzx/lfiwax does not need any permute.
9233  if (NumUsesOfInputLD == 1 &&
9234  (Op->getValueType(0) == MVT::v2i64 && NewOpcode != PPCISD::LD_SPLAT &&
9235  !Subtarget.isLittleEndian() && Subtarget.hasVSX() &&
9236  Subtarget.hasLFIWAX()))
9237  return SDValue();
9238 
9239  // case 2 - lxvr[hb]x
9240  // 2.1: load result is at most i16;
9241  // 2.2: build a vector with above loaded value;
9242  // 2.3: the vector has only one value at index 0, others are all undef;
9243  // 2.4: on LE target, so that lxvr[hb]x does not need any permute.
9244  if (NumUsesOfInputLD == 1 && Subtarget.isLittleEndian() &&
9245  Subtarget.isISA3_1() && ElementSize <= 16)
9246  return SDValue();
9247 
9248  assert(NumUsesOfInputLD > 0 && "No uses of input LD of a build_vector?");
9249  if (InputLoad->getNode()->hasNUsesOfValue(NumUsesOfInputLD, 0) &&
9250  Subtarget.hasVSX()) {
9251  SDValue Ops[] = {
9252  LD->getChain(), // Chain
9253  LD->getBasePtr(), // Ptr
9254  DAG.getValueType(Op.getValueType()) // VT
9255  };
9256  SDValue LdSplt = DAG.getMemIntrinsicNode(
9257  NewOpcode, dl, DAG.getVTList(Op.getValueType(), MVT::Other), Ops,
9258  LD->getMemoryVT(), LD->getMemOperand());
9259  // Replace all uses of the output chain of the original load with the
9260  // output chain of the new load.
9261  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1),
9262  LdSplt.getValue(1));
9263  return LdSplt;
9264  }
9265  }
9266 
9267  // In 64BIT mode BUILD_VECTOR nodes that are not constant splats of up to
9268  // 32-bits can be lowered to VSX instructions under certain conditions.
9269  // Without VSX, there is no pattern more efficient than expanding the node.
9270  if (Subtarget.hasVSX() && Subtarget.isPPC64() &&
9272  Subtarget.hasP8Vector()))
9273  return Op;
9274  return SDValue();
9275  }
9276 
9277  uint64_t SplatBits = APSplatBits.getZExtValue();
9278  uint64_t SplatUndef = APSplatUndef.getZExtValue();
9279  unsigned SplatSize = SplatBitSize / 8;
9280 
9281  // First, handle single instruction cases.
9282 
9283  // All zeros?
9284  if (SplatBits == 0) {
9285  // Canonicalize all zero vectors to be v4i32.
9286  if (Op.getValueType() != MVT::v4i32 || HasAnyUndefs) {
9287  SDValue Z = DAG.getConstant(0, dl, MVT::v4i32);
9288  Op = DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Z);
9289  }
9290  return Op;
9291  }
9292 
9293  // We have XXSPLTIW for constant splats four bytes wide.
9294  // Given vector length is a multiple of 4, 2-byte splats can be replaced
9295  // with 4-byte splats. We replicate the SplatBits in case of 2-byte splat to
9296  // make a 4-byte splat element. For example: 2-byte splat of 0xABAB can be
9297  // turned into a 4-byte splat of 0xABABABAB.
9298  if (Subtarget.hasPrefixInstrs() && SplatSize == 2)
9299  return getCanonicalConstSplat(SplatBits | (SplatBits << 16), SplatSize * 2,
9300  Op.getValueType(), DAG, dl);
9301 
9302  if (Subtarget.hasPrefixInstrs() && SplatSize == 4)
9303  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9304  dl);
9305 
9306  // We have XXSPLTIB for constant splats one byte wide.
9307  if (Subtarget.hasP9Vector() && SplatSize == 1)
9308  return getCanonicalConstSplat(SplatBits, SplatSize, Op.getValueType(), DAG,
9309  dl);
9310 
9311  // If the sign extended value is in the range [-16,15], use VSPLTI[bhw].
9312  int32_t SextVal= (int32_t(SplatBits << (32-SplatBitSize)) >>
9313  (32-SplatBitSize));
9314  if (SextVal >= -16 && SextVal <= 15)
9315  return getCanonicalConstSplat(SextVal, SplatSize, Op.getValueType(), DAG,
9316  dl);
9317 
9318  // Two instruction sequences.
9319 
9320  // If this value is in the range [-32,30] and is even, use:
9321  // VSPLTI[bhw](val/2) + VSPLTI[bhw](val/2)
9322  // If this value is in the range [17,31] and is odd, use:
9323  // VSPLTI[bhw](val-16) - VSPLTI[bhw](-16)
9324  // If this value is in the range [-31,-17] and is odd, use:
9325  // VSPLTI[bhw](val+16) + VSPLTI[bhw](-16)
9326  // Note the last two are three-instruction sequences.
9327  if (SextVal >= -32 && SextVal <= 31) {
9328  // To avoid having these optimizations undone by constant folding,
9329  // we convert to a pseudo that will be expanded later into one of
9330  // the above forms.
9331  SDValue Elt = DAG.getConstant(SextVal, dl, MVT::i32);
9332  EVT VT = (SplatSize == 1 ? MVT::v16i8 :
9333  (SplatSize == 2 ? MVT::v8i16 : MVT::v4i32));
9334  SDValue EltSize = DAG.getConstant(SplatSize, dl, MVT::i32);
9335  SDValue RetVal = DAG.getNode(PPCISD::VADD_SPLAT, dl, VT, Elt, EltSize);
9336  if (VT == Op.getValueType())
9337  return RetVal;
9338  else
9339  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), RetVal);
9340  }
9341 
9342  // If this is 0x8000_0000 x 4, turn into vspltisw + vslw. If it is
9343  // 0x7FFF_FFFF x 4, turn it into not(0x8000_0000). This is important
9344  // for fneg/fabs.
9345  if (SplatSize == 4 && SplatBits == (0x7FFFFFFF&~SplatUndef)) {
9346  // Make -1 and vspltisw -1:
9347  SDValue OnesV = getCanonicalConstSplat(-1, 4, MVT::v4i32, DAG, dl);
9348 
9349  // Make the VSLW intrinsic, computing 0x8000_0000.
9350  SDValue Res = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, OnesV,
9351  OnesV, DAG, dl);
9352 
9353  // xor by OnesV to invert it.
9354  Res = DAG.getNode(ISD::XOR, dl, MVT::v4i32, Res, OnesV);
9355  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9356  }
9357 
9358  // Check to see if this is a wide variety of vsplti*, binop self cases.
9359  static const signed char SplatCsts[] = {
9360  -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7,
9361  -8, 8, -9, 9, -10, 10, -11, 11, -12, 12, -13, 13, 14, -14, 15, -15, -16
9362  };
9363 
9364  for (unsigned idx = 0; idx < array_lengthof(SplatCsts); ++idx) {
9365  // Indirect through the SplatCsts array so that we favor 'vsplti -1' for
9366  // cases which are ambiguous (e.g. formation of 0x8000_0000). 'vsplti -1'
9367  int i = SplatCsts[idx];
9368 
9369  // Figure out what shift amount will be used by altivec if shifted by i in
9370  // this splat size.
9371  unsigned TypeShiftAmt = i & (SplatBitSize-1);
9372 
9373  // vsplti + shl self.
9374  if (SextVal == (int)((unsigned)i << TypeShiftAmt)) {
9375  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9376  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9377  Intrinsic::ppc_altivec_vslb, Intrinsic::ppc_altivec_vslh, 0,
9378  Intrinsic::ppc_altivec_vslw
9379  };
9380  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9381  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9382  }
9383 
9384  // vsplti + srl self.
9385  if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) {
9386  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9387  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9388  Intrinsic::ppc_altivec_vsrb, Intrinsic::ppc_altivec_vsrh, 0,
9389  Intrinsic::ppc_altivec_vsrw
9390  };
9391  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9392  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9393  }
9394 
9395  // vsplti + rol self.
9396  if (SextVal == (int)(((unsigned)i << TypeShiftAmt) |
9397  ((unsigned)i >> (SplatBitSize-TypeShiftAmt)))) {
9398  SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl);
9399  static const unsigned IIDs[] = { // Intrinsic to use for each size.
9400  Intrinsic::ppc_altivec_vrlb, Intrinsic::ppc_altivec_vrlh, 0,
9401  Intrinsic::ppc_altivec_vrlw
9402  };
9403  Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl);
9404  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res);
9405  }
9406 
9407  // t = vsplti c, result = vsldoi t, t, 1
9408  if (SextVal == (int)(((unsigned)i << 8) | (i < 0 ? 0xFF : 0))) {
9409  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9410  unsigned Amt = Subtarget.isLittleEndian() ? 15 : 1;
9411  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9412  }
9413  // t = vsplti c, result = vsldoi t, t, 2
9414  if (SextVal == (int)(((unsigned)i << 16) | (i < 0 ? 0xFFFF : 0))) {
9415  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9416  unsigned Amt = Subtarget.isLittleEndian() ? 14 : 2;
9417  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9418  }
9419  // t = vsplti c, result = vsldoi t, t, 3
9420  if (SextVal == (int)(((unsigned)i << 24) | (i < 0 ? 0xFFFFFF : 0))) {
9421  SDValue T = getCanonicalConstSplat(i, SplatSize, MVT::v16i8, DAG, dl);
9422  unsigned Amt = Subtarget.isLittleEndian() ? 13 : 3;
9423  return BuildVSLDOI(T, T, Amt, Op.getValueType(), DAG, dl);
9424  }
9425  }
9426 
9427  return SDValue();
9428 }
9429 
9430 /// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
9431 /// the specified operations to build the shuffle.
9432 static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS,
9433  SDValue RHS, SelectionDAG &DAG,
9434  const SDLoc &dl) {
9435  unsigned OpNum = (PFEntry >> 26) & 0x0F;
9436  unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1);
9437  unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1);
9438 
9439  enum {
9440  OP_COPY = 0, // Copy, used for things like <u,u,u,3> to say it is <0,1,2,3>
9441  OP_VMRGHW,
9442  OP_VMRGLW,
9443  OP_VSPLTISW0,
9444  OP_VSPLTISW1,
9445  OP_VSPLTISW2,
9446  OP_VSPLTISW3,
9447  OP_VSLDOI4,
9448  OP_VSLDOI8,
9449  OP_VSLDOI12
9450  };
9451 
9452  if (OpNum == OP_COPY) {
9453  if (LHSID == (1*9+2)*9+3) return LHS;
9454  assert(LHSID == ((4*9+5)*9+6)*9+7 && "Illegal OP_COPY!");
9455  return RHS;
9456  }
9457 
9458  SDValue OpLHS, OpRHS;
9459  OpLHS = GeneratePerfectShuffle(PerfectShuffleTable[LHSID], LHS, RHS, DAG, dl);
9460  OpRHS = GeneratePerfectShuffle(PerfectShuffleTable[RHSID], LHS, RHS, DAG, dl);
9461 
9462  int ShufIdxs[16];
9463  switch (OpNum) {
9464  default: llvm_unreachable("Unknown i32 permute!");
9465  case OP_VMRGHW:
9466  ShufIdxs[ 0] = 0; ShufIdxs[ 1] = 1; ShufIdxs[ 2] = 2; ShufIdxs[ 3] = 3;
9467  ShufIdxs[ 4] = 16; ShufIdxs[ 5] = 17; ShufIdxs[ 6] = 18; ShufIdxs[ 7] = 19;
9468  ShufIdxs[ 8] = 4; ShufIdxs[ 9] = 5; ShufIdxs[10] = 6; ShufIdxs[11] = 7;
9469  ShufIdxs[12] = 20; ShufIdxs[13] = 21; ShufIdxs[14] = 22; ShufIdxs[15] = 23;
9470  break;
9471  case OP_VMRGLW:
9472  ShufIdxs[ 0] = 8; ShufIdxs[ 1] = 9; ShufIdxs[ 2] = 10; ShufIdxs[ 3] = 11;
9473  ShufIdxs[ 4] = 24; ShufIdxs[ 5] = 25; ShufIdxs[ 6] = 26; ShufIdxs[ 7] = 27;
9474  ShufIdxs[ 8] = 12; ShufIdxs[ 9] = 13; ShufIdxs[10] = 14; ShufIdxs[11] = 15;
9475  ShufIdxs[12] = 28; ShufIdxs[13] = 29; ShufIdxs[14] = 30; ShufIdxs[15] = 31;
9476  break;
9477  case OP_VSPLTISW0:
9478  for (unsigned i = 0; i != 16; ++i)
9479  ShufIdxs[i] = (i&3)+0;
9480  break;
9481  case OP_VSPLTISW1:
9482  for (unsigned i = 0; i != 16; ++i)
9483  ShufIdxs[i] = (i&3)+4;
9484  break;
9485  case OP_VSPLTISW2:
9486  for (unsigned i = 0; i != 16; ++i)
9487  ShufIdxs[i] = (i&3)+8;
9488  break;
9489  case OP_VSPLTISW3:
9490  for (unsigned i = 0; i != 16; ++i)
9491  ShufIdxs[i] = (i&3)+12;
9492  break;
9493  case OP_VSLDOI4:
9494  return BuildVSLDOI(OpLHS, OpRHS, 4, OpLHS.getValueType(), DAG, dl);
9495  case OP_VSLDOI8:
9496  return BuildVSLDOI(OpLHS, OpRHS, 8, OpLHS.getValueType(), DAG, dl);
9497  case OP_VSLDOI12:
9498  return BuildVSLDOI(OpLHS, OpRHS, 12, OpLHS.getValueType(), DAG, dl);
9499  }
9500  EVT VT = OpLHS.getValueType();
9501  OpLHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpLHS);
9502  OpRHS = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OpRHS);
9503  SDValue T = DAG.getVectorShuffle(MVT::v16i8, dl, OpLHS, OpRHS, ShufIdxs);
9504  return DAG.getNode(ISD::BITCAST, dl, VT, T);
9505 }
9506 
9507 /// lowerToVINSERTB - Return the SDValue if this VECTOR_SHUFFLE can be handled
9508 /// by the VINSERTB instruction introduced in ISA 3.0, else just return default
9509 /// SDValue.
9510 SDValue PPCTargetLowering::lowerToVINSERTB(ShuffleVectorSDNode *N,
9511  SelectionDAG &DAG) const {
9512  const unsigned BytesInVector = 16;
9513  bool IsLE = Subtarget.isLittleEndian();
9514  SDLoc dl(N);
9515  SDValue V1 = N->getOperand(0);
9516  SDValue V2 = N->getOperand(1);
9517  unsigned ShiftElts = 0, InsertAtByte = 0;
9518  bool Swap = false;
9519 
9520  // Shifts required to get the byte we want at element 7.
9521  unsigned LittleEndianShifts[] = {8, 7, 6, 5, 4, 3, 2, 1,
9522  0, 15, 14, 13, 12, 11, 10, 9};
9523  unsigned BigEndianShifts[] = {9, 10, 11, 12, 13, 14, 15, 0,
9524  1, 2, 3, 4, 5, 6, 7, 8};
9525 
9526  ArrayRef<int> Mask = N->getMask();
9527  int OriginalOrder[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
9528 
9529  // For each mask element, find out if we're just inserting something
9530  // from V2 into V1 or vice versa.
9531  // Possible permutations inserting an element from V2 into V1:
9532  // X, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9533  // 0, X, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
9534  // ...
9535  // 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, X
9536  // Inserting from V1 into V2 will be similar, except mask range will be
9537  // [16,31].
9538 
9539  bool FoundCandidate = false;
9540  // If both vector operands for the shuffle are the same vector, the mask
9541  // will contain only elements from the first one and the second one will be
9542  // undef.
9543  unsigned VINSERTBSrcElem = IsLE ? 8 : 7;
9544  // Go through the mask of half-words to find an element that's being moved
9545  // from one vector to the other.
9546  for (unsigned i = 0; i < BytesInVector; ++i) {
9547  unsigned CurrentElement = Mask[i];
9548  // If 2nd operand is undefined, we should only look for element 7 in the
9549  // Mask.
9550  if (V2.isUndef() && CurrentElement != VINSERTBSrcElem)
9551  continue;
9552 
9553  bool OtherElementsInOrder = true;
9554  // Examine the other elements in the Mask to see if they're in original
9555  // order.
9556  for (unsigned j = 0; j < BytesInVector; ++j) {
9557  if (j == i)
9558  continue;
9559  // If CurrentElement is from V1 [0,15], then we the rest of the Mask to be
9560  // from V2 [16,31] and vice versa. Unless the 2nd operand is undefined,
9561  // in which we always assume we're always picking from the 1st operand.
9562  int MaskOffset =
9563  (!V2.isUndef() && CurrentElement < BytesInVector) ? BytesInVector : 0;
9564  if (Mask[j] != OriginalOrder[j] + MaskOffset) {
9565  OtherElementsInOrder = false;
9566  break;
9567  }
9568  }
9569  // If other elements are in original order, we record the number of shifts
9570  // we need to get the element we want into element 7. Also record which byte
9571  // in the vector we should insert into.
9572  if (OtherElementsInOrder) {
9573  // If 2nd operand is undefined, we assume no shifts and no swapping.
9574  if (V2.isUndef()) {
9575  ShiftElts = 0;
9576  Swap = false;
9577  } else {
9578  // Only need the last 4-bits for shifts because operands will be swapped if CurrentElement is >= 2^4.
9579  ShiftElts = IsLE ? LittleEndianShifts[CurrentElement & 0xF]
9580  : BigEndianShifts[CurrentElement & 0xF];
9581  Swap = CurrentElement < BytesInVector;
9582  }
9583  InsertAtByte = IsLE ? BytesInVector - (i + 1) : i;
9584  FoundCandidate = true;
9585  break;
9586  }
9587  }
9588 
9589  if (!FoundCandidate)
9590  return SDValue();
9591 
9592  // Candidate found, construct the proper SDAG sequence with VINSERTB,
9593  // optionally with VECSHL if shift is required.
9594  if (Swap)
9595  std::swap(V1, V2);
9596  if (V2.isUndef())
9597  V2 = V1;
9598  if (ShiftElts) {
9599  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9600  DAG.getConstant(ShiftElts, dl, MVT::i32));
9601  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, Shl,
9602  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9603  }
9604  return DAG.getNode(PPCISD::VECINSERT, dl, MVT::v16i8, V1, V2,
9605  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9606 }
9607 
9608 /// lowerToVINSERTH - Return the SDValue if this VECTOR_SHUFFLE can be handled
9609 /// by the VINSERTH instruction introduced in ISA 3.0, else just return default
9610 /// SDValue.
9611 SDValue PPCTargetLowering::lowerToVINSERTH(ShuffleVectorSDNode *N,
9612  SelectionDAG &DAG) const {
9613  const unsigned NumHalfWords = 8;
9614  const unsigned BytesInVector = NumHalfWords * 2;
9615  // Check that the shuffle is on half-words.
9616  if (!isNByteElemShuffleMask(N, 2, 1))
9617  return SDValue();
9618 
9619  bool IsLE = Subtarget.isLittleEndian();
9620  SDLoc dl(N);
9621  SDValue V1 = N->getOperand(0);
9622  SDValue V2 = N->getOperand(1);
9623  unsigned ShiftElts = 0, InsertAtByte = 0;
9624  bool Swap = false;
9625 
9626  // Shifts required to get the half-word we want at element 3.
9627  unsigned LittleEndianShifts[] = {4, 3, 2, 1, 0, 7, 6, 5};
9628  unsigned BigEndianShifts[] = {5, 6, 7, 0, 1, 2, 3, 4};
9629 
9630  uint32_t Mask = 0;
9631  uint32_t OriginalOrderLow = 0x1234567;
9632  uint32_t OriginalOrderHigh = 0x89ABCDEF;
9633  // Now we look at mask elements 0,2,4,6,8,10,12,14. Pack the mask into a
9634  // 32-bit space, only need 4-bit nibbles per element.
9635  for (unsigned i = 0; i < NumHalfWords; ++i) {
9636  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9637  Mask |= ((uint32_t)(N->getMaskElt(i * 2) / 2) << MaskShift);
9638  }
9639 
9640  // For each mask element, find out if we're just inserting something
9641  // from V2 into V1 or vice versa. Possible permutations inserting an element
9642  // from V2 into V1:
9643  // X, 1, 2, 3, 4, 5, 6, 7
9644  // 0, X, 2, 3, 4, 5, 6, 7
9645  // 0, 1, X, 3, 4, 5, 6, 7
9646  // 0, 1, 2, X, 4, 5, 6, 7
9647  // 0, 1, 2, 3, X, 5, 6, 7
9648  // 0, 1, 2, 3, 4, X, 6, 7
9649  // 0, 1, 2, 3, 4, 5, X, 7
9650  // 0, 1, 2, 3, 4, 5, 6, X
9651  // Inserting from V1 into V2 will be similar, except mask range will be [8,15].
9652 
9653  bool FoundCandidate = false;
9654  // Go through the mask of half-words to find an element that's being moved
9655  // from one vector to the other.
9656  for (unsigned i = 0; i < NumHalfWords; ++i) {
9657  unsigned MaskShift = (NumHalfWords - 1 - i) * 4;
9658  uint32_t MaskOneElt = (Mask >> MaskShift) & 0xF;
9659  uint32_t MaskOtherElts = ~(0xF << MaskShift);
9660  uint32_t TargetOrder = 0x0;
9661 
9662  // If both vector operands for the shuffle are the same vector, the mask
9663  // will contain only elements from the first one and the second one will be
9664  // undef.
9665  if (V2.isUndef()) {
9666  ShiftElts = 0;
9667  unsigned VINSERTHSrcElem = IsLE ? 4 : 3;
9668  TargetOrder = OriginalOrderLow;
9669  Swap = false;
9670  // Skip if not the correct element or mask of other elements don't equal
9671  // to our expected order.
9672  if (MaskOneElt == VINSERTHSrcElem &&
9673  (Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9674  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9675  FoundCandidate = true;
9676  break;
9677  }
9678  } else { // If both operands are defined.
9679  // Target order is [8,15] if the current mask is between [0,7].
9680  TargetOrder =
9681  (MaskOneElt < NumHalfWords) ? OriginalOrderHigh : OriginalOrderLow;
9682  // Skip if mask of other elements don't equal our expected order.
9683  if ((Mask & MaskOtherElts) == (TargetOrder & MaskOtherElts)) {
9684  // We only need the last 3 bits for the number of shifts.
9685  ShiftElts = IsLE ? LittleEndianShifts[MaskOneElt & 0x7]
9686  : BigEndianShifts[MaskOneElt & 0x7];
9687  InsertAtByte = IsLE ? BytesInVector - (i + 1) * 2 : i * 2;
9688  Swap = MaskOneElt < NumHalfWords;
9689  FoundCandidate = true;
9690  break;
9691  }
9692  }
9693  }
9694 
9695  if (!FoundCandidate)
9696  return SDValue();
9697 
9698  // Candidate found, construct the proper SDAG sequence with VINSERTH,
9699  // optionally with VECSHL if shift is required.
9700  if (Swap)
9701  std::swap(V1, V2);
9702  if (V2.isUndef())
9703  V2 = V1;
9704  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9705  if (ShiftElts) {
9706  // Double ShiftElts because we're left shifting on v16i8 type.
9707  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v16i8, V2, V2,
9708  DAG.getConstant(2 * ShiftElts, dl, MVT::i32));
9709  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, Shl);
9710  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9711  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9712  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9713  }
9714  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V2);
9715  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v8i16, Conv1, Conv2,
9716  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9717  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9718 }
9719 
9720 /// lowerToXXSPLTI32DX - Return the SDValue if this VECTOR_SHUFFLE can be
9721 /// handled by the XXSPLTI32DX instruction introduced in ISA 3.1, otherwise
9722 /// return the default SDValue.
9723 SDValue PPCTargetLowering::lowerToXXSPLTI32DX(ShuffleVectorSDNode *SVN,
9724  SelectionDAG &DAG) const {
9725  // The LHS and RHS may be bitcasts to v16i8 as we canonicalize shuffles
9726  // to v16i8. Peek through the bitcasts to get the actual operands.
9729 
9730  auto ShuffleMask = SVN->getMask();
9731  SDValue VecShuffle(SVN, 0);
9732  SDLoc DL(SVN);
9733 
9734  // Check that we have a four byte shuffle.
9735  if (!isNByteElemShuffleMask(SVN, 4, 1))
9736  return SDValue();
9737 
9738  // Canonicalize the RHS being a BUILD_VECTOR when lowering to xxsplti32dx.
9739  if (RHS->getOpcode() != ISD::BUILD_VECTOR) {
9740  std::swap(LHS, RHS);
9742  ShuffleMask = cast<ShuffleVectorSDNode>(VecShuffle)->getMask();
9743  }
9744 
9745  // Ensure that the RHS is a vector of constants.
9746  BuildVectorSDNode *BVN = dyn_cast<BuildVectorSDNode>(RHS.getNode());
9747  if (!BVN)
9748  return SDValue();
9749 
9750  // Check if RHS is a splat of 4-bytes (or smaller).
9751  APInt APSplatValue, APSplatUndef;
9752  unsigned SplatBitSize;
9753  bool HasAnyUndefs;
9754  if (!BVN->isConstantSplat(APSplatValue, APSplatUndef, SplatBitSize,
9755  HasAnyUndefs, 0, !Subtarget.isLittleEndian()) ||
9756  SplatBitSize > 32)
9757  return SDValue();
9758 
9759  // Check that the shuffle mask matches the semantics of XXSPLTI32DX.
9760  // The instruction splats a constant C into two words of the source vector
9761  // producing { C, Unchanged, C, Unchanged } or { Unchanged, C, Unchanged, C }.
9762  // Thus we check that the shuffle mask is the equivalent of
9763  // <0, [4-7], 2, [4-7]> or <[4-7], 1, [4-7], 3> respectively.
9764  // Note: the check above of isNByteElemShuffleMask() ensures that the bytes
9765  // within each word are consecutive, so we only need to check the first byte.
9766  SDValue Index;
9767  bool IsLE = Subtarget.isLittleEndian();
9768  if ((ShuffleMask[0] == 0 && ShuffleMask[8] == 8) &&
9769  (ShuffleMask[4] % 4 == 0 && ShuffleMask[12] % 4 == 0 &&
9770  ShuffleMask[4] > 15 && ShuffleMask[12] > 15))
9771  Index = DAG.getTargetConstant(IsLE ? 0 : 1, DL, MVT::i32);
9772  else if ((ShuffleMask[4] == 4 && ShuffleMask[12] == 12) &&
9773  (ShuffleMask[0] % 4 == 0 && ShuffleMask[8] % 4 == 0 &&
9774  ShuffleMask[0] > 15 && ShuffleMask[8] > 15))
9775  Index = DAG.getTargetConstant(IsLE ? 1 : 0, DL, MVT::i32);
9776  else
9777  return SDValue();
9778 
9779  // If the splat is narrower than 32-bits, we need to get the 32-bit value
9780  // for XXSPLTI32DX.
9781  unsigned SplatVal = APSplatValue.getZExtValue();
9782  for (; SplatBitSize < 32; SplatBitSize <<= 1)
9783  SplatVal |= (SplatVal << SplatBitSize);
9784 
9785  SDValue SplatNode = DAG.getNode(
9787  Index, DAG.getTargetConstant(SplatVal, DL, MVT::i32));
9788  return DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, SplatNode);
9789 }
9790 
9791 /// LowerROTL - Custom lowering for ROTL(v1i128) to vector_shuffle(v16i8).
9792 /// We lower ROTL(v1i128) to vector_shuffle(v16i8) only if shift amount is
9793 /// a multiple of 8. Otherwise convert it to a scalar rotation(i128)
9794 /// i.e (or (shl x, C1), (srl x, 128-C1)).
9795 SDValue PPCTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
9796  assert(Op.getOpcode() == ISD::ROTL && "Should only be called for ISD::ROTL");
9797  assert(Op.getValueType() == MVT::v1i128 &&
9798  "Only set v1i128 as custom, other type shouldn't reach here!");
9799  SDLoc dl(Op);
9800  SDValue N0 = peekThroughBitcasts(Op.getOperand(0));
9801  SDValue N1 = peekThroughBitcasts(Op.getOperand(1));
9802  unsigned SHLAmt = N1.getConstantOperandVal(0);
9803  if (SHLAmt % 8 == 0) {
9804  SmallVector<int, 16> Mask(16, 0);
9805  std::iota(Mask.begin(), Mask.end(), 0);
9806  std::rotate(Mask.begin(), Mask.begin() + SHLAmt / 8, Mask.end());
9807  if (SDValue Shuffle =
9809  DAG.getUNDEF(MVT::v16i8), Mask))
9810  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, Shuffle);
9811  }
9812  SDValue ArgVal = DAG.getBitcast(MVT::i128, N0);
9813  SDValue SHLOp = DAG.getNode(ISD::SHL, dl, MVT::i128, ArgVal,
9814  DAG.getConstant(SHLAmt, dl, MVT::i32));
9815  SDValue SRLOp = DAG.getNode(ISD::SRL, dl, MVT::i128, ArgVal,
9816  DAG.getConstant(128 - SHLAmt, dl, MVT::i32));
9817  SDValue OROp = DAG.getNode(ISD::OR, dl, MVT::i128, SHLOp, SRLOp);
9818  return DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, OROp);
9819 }
9820 
9821 /// LowerVECTOR_SHUFFLE - Return the code we lower for VECTOR_SHUFFLE. If this
9822 /// is a shuffle we can handle in a single instruction, return it. Otherwise,
9823 /// return the code it can be lowered into. Worst case, it can always be
9824 /// lowered into a vperm.
9825 SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
9826  SelectionDAG &DAG) const {
9827  SDLoc dl(Op);
9828  SDValue V1 = Op.getOperand(0);
9829  SDValue V2 = Op.getOperand(1);
9830  ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
9831 
9832  // Any nodes that were combined in the target-independent combiner prior
9833  // to vector legalization will not be sent to the target combine. Try to
9834  // combine it here.
9835  if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
9836  if (!isa<ShuffleVectorSDNode>(NewShuffle))
9837  return NewShuffle;
9838  Op = NewShuffle;
9839  SVOp = cast<ShuffleVectorSDNode>(Op);
9840  V1 = Op.getOperand(0);
9841  V2 = Op.getOperand(1);
9842  }
9843  EVT VT = Op.getValueType();
9844  bool isLittleEndian = Subtarget.isLittleEndian();
9845 
9846  unsigned ShiftElts, InsertAtByte;
9847  bool Swap = false;
9848 
9849  // If this is a load-and-splat, we can do that with a single instruction
9850  // in some cases. However if the load has multiple uses, we don't want to
9851  // combine it because that will just produce multiple loads.
9852  bool IsPermutedLoad = false;
9853  const SDValue *InputLoad = getNormalLoadInput(V1, IsPermutedLoad);
9854  if (InputLoad && Subtarget.hasVSX() && V2.isUndef() &&
9855  (PPC::isSplatShuffleMask(SVOp, 4) || PPC::isSplatShuffleMask(SVOp, 8)) &&
9856  InputLoad->hasOneUse()) {
9857  bool IsFourByte = PPC::isSplatShuffleMask(SVOp, 4);
9858  int SplatIdx =
9859  PPC::getSplatIdxForPPCMnemonics(SVOp, IsFourByte ? 4 : 8, DAG);
9860 
9861  // The splat index for permuted loads will be in the left half of the vector
9862  // which is strictly wider than the loaded value by 8 bytes. So we need to
9863  // adjust the splat index to point to the correct address in memory.
9864  if (IsPermutedLoad) {
9865  assert((isLittleEndian || IsFourByte) &&
9866  "Unexpected size for permuted load on big endian target");
9867  SplatIdx += IsFourByte ? 2 : 1;
9868  assert((SplatIdx < (IsFourByte ? 4 : 2)) &&
9869  "Splat of a value outside of the loaded memory");
9870  }
9871 
9872  LoadSDNode *LD = cast<LoadSDNode>(*InputLoad);
9873  // For 4-byte load-and-splat, we need Power9.
9874  if ((IsFourByte && Subtarget.hasP9Vector()) || !IsFourByte) {
9875  uint64_t Offset = 0;
9876  if (IsFourByte)
9877  Offset = isLittleEndian ? (3 - SplatIdx) * 4 : SplatIdx * 4;
9878  else
9879  Offset = isLittleEndian ? (1 - SplatIdx) * 8 : SplatIdx * 8;
9880 
9881  // If the width of the load is the same as the width of the splat,
9882  // loading with an offset would load the wrong memory.
9883  if (LD->getValueType(0).getSizeInBits() == (IsFourByte ? 32 : 64))
9884  Offset = 0;
9885 
9886  SDValue BasePtr = LD->getBasePtr();
9887  if (Offset != 0)
9889  BasePtr, DAG.getIntPtrConstant(Offset, dl));
9890  SDValue Ops[] = {
9891  LD->getChain(), // Chain
9892  BasePtr, // BasePtr
9893  DAG.getValueType(Op.getValueType()) // VT
9894  };
9895  SDVTList VTL =
9896  DAG.getVTList(IsFourByte ? MVT::v4i32 : MVT::v2i64, MVT::Other);
9897  SDValue LdSplt =
9899  Ops, LD->getMemoryVT(), LD->getMemOperand());
9900  DAG.ReplaceAllUsesOfValueWith(InputLoad->getValue(1), LdSplt.getValue(1));
9901  if (LdSplt.getValueType() != SVOp->getValueType(0))
9902  LdSplt = DAG.getBitcast(SVOp->getValueType(0), LdSplt);
9903  return LdSplt;
9904  }
9905  }
9906  if (Subtarget.hasP9Vector() &&
9907  PPC::isXXINSERTWMask(SVOp, ShiftElts, InsertAtByte, Swap,
9908  isLittleEndian)) {
9909  if (Swap)
9910  std::swap(V1, V2);
9911  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9912  SDValue Conv2 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2);
9913  if (ShiftElts) {
9914  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv2, Conv2,
9915  DAG.getConstant(ShiftElts, dl, MVT::i32));
9916  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Shl,
9917  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9918  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9919  }
9920  SDValue Ins = DAG.getNode(PPCISD::VECINSERT, dl, MVT::v4i32, Conv1, Conv2,
9921  DAG.getConstant(InsertAtByte, dl, MVT::i32));
9922  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Ins);
9923  }
9924 
9925  if (Subtarget.hasPrefixInstrs()) {
9926  SDValue SplatInsertNode;
9927  if ((SplatInsertNode = lowerToXXSPLTI32DX(SVOp, DAG)))
9928  return SplatInsertNode;
9929  }
9930 
9931  if (Subtarget.hasP9Altivec()) {
9932  SDValue NewISDNode;
9933  if ((NewISDNode = lowerToVINSERTH(SVOp, DAG)))
9934  return NewISDNode;
9935 
9936  if ((NewISDNode = lowerToVINSERTB(SVOp, DAG)))
9937  return NewISDNode;
9938  }
9939 
9940  if (Subtarget.hasVSX() &&
9941  PPC::isXXSLDWIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9942  if (Swap)
9943  std::swap(V1, V2);
9944  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9945  SDValue Conv2 =
9946  DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V2.isUndef() ? V1 : V2);
9947 
9948  SDValue Shl = DAG.getNode(PPCISD::VECSHL, dl, MVT::v4i32, Conv1, Conv2,
9949  DAG.getConstant(ShiftElts, dl, MVT::i32));
9950  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Shl);
9951  }
9952 
9953  if (Subtarget.hasVSX() &&
9954  PPC::isXXPERMDIShuffleMask(SVOp, ShiftElts, Swap, isLittleEndian)) {
9955  if (Swap)
9956  std::swap(V1, V2);
9957  SDValue Conv1 = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9958  SDValue Conv2 =
9959  DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V2.isUndef() ? V1 : V2);
9960 
9961  SDValue PermDI = DAG.getNode(PPCISD::XXPERMDI, dl, MVT::v2i64, Conv1, Conv2,
9962  DAG.getConstant(ShiftElts, dl, MVT::i32));
9963  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, PermDI);
9964  }
9965 
9966  if (Subtarget.hasP9Vector()) {
9967  if (PPC::isXXBRHShuffleMask(SVOp)) {
9968  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, V1);
9969  SDValue ReveHWord = DAG.getNode(ISD::BSWAP, dl, MVT::v8i16, Conv);
9970  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveHWord);
9971  } else if (PPC::isXXBRWShuffleMask(SVOp)) {
9972  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9973  SDValue ReveWord = DAG.getNode(ISD::BSWAP, dl, MVT::v4i32, Conv);
9974  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveWord);
9975  } else if (PPC::isXXBRDShuffleMask(SVOp)) {
9976  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2i64, V1);
9977  SDValue ReveDWord = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Conv);
9978  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveDWord);
9979  } else if (PPC::isXXBRQShuffleMask(SVOp)) {
9980  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v1i128, V1);
9981  SDValue ReveQWord = DAG.getNode(ISD::BSWAP, dl, MVT::v1i128, Conv);
9982  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, ReveQWord);
9983  }
9984  }
9985 
9986  if (Subtarget.hasVSX()) {
9987  if (V2.isUndef() && PPC::isSplatShuffleMask(SVOp, 4)) {
9988  int SplatIdx = PPC::getSplatIdxForPPCMnemonics(SVOp, 4, DAG);
9989 
9990  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v4i32, V1);
9991  SDValue Splat = DAG.getNode(PPCISD::XXSPLT, dl, MVT::v4i32, Conv,
9992  DAG.getConstant(SplatIdx, dl, MVT::i32));
9993  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Splat);
9994  }
9995 
9996  // Left shifts of 8 bytes are actually swaps. Convert accordingly.
9997  if (V2.isUndef() && PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) == 8) {
9998  SDValue Conv = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, V1);
9999  SDValue Swap = DAG.getNode(PPCISD::SWAP_NO_CHAIN, dl, MVT::v2f64, Conv);
10000  return DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, Swap);
10001  }
10002  }
10003 
10004  // Cases that are handled by instructions that take permute immediates
10005  // (such as vsplt*) should be left as VECTOR_SHUFFLE nodes so they can be
10006  // selected by the instruction selector.
10007  if (V2.isUndef()) {
10008  if (PPC::isSplatShuffleMask(SVOp, 1) ||
10009  PPC::isSplatShuffleMask(SVOp, 2) ||
10010  PPC::isSplatShuffleMask(SVOp, 4) ||
10011  PPC::isVPKUWUMShuffleMask(SVOp, 1, DAG) ||
10012  PPC::isVPKUHUMShuffleMask(SVOp, 1, DAG) ||
10013  PPC::isVSLDOIShuffleMask(SVOp, 1, DAG) != -1 ||
10014  PPC::isVMRGLShuffleMask(SVOp, 1, 1, DAG) ||
10015  PPC::isVMRGLShuffleMask(SVOp, 2, 1, DAG) ||
10016  PPC::isVMRGLShuffleMask(SVOp, 4, 1, DAG) ||
10017  PPC::isVMRGHShuffleMask(SVOp, 1, 1, DAG) ||
10018  PPC::isVMRGHShuffleMask(SVOp, 2, 1, DAG) ||
10019  PPC::isVMRGHShuffleMask(SVOp, 4, 1, DAG) ||
10020  (Subtarget.hasP8Altivec() && (
10021  PPC::isVPKUDUMShuffleMask(SVOp, 1, DAG) ||
10022  PPC::isVMRGEOShuffleMask(SVOp, true, 1, DAG) ||
10023  PPC::isVMRGEOShuffleMask(SVOp, false, 1, DAG)))) {
10024  return Op;
10025  }
10026  }
10027 
10028  // Altivec has a variety of "shuffle immediates" that take two vector inputs
10029  // and produce a fixed permutation. If any of these match, do not lower to
10030  // VPERM.
10031  unsigned int ShuffleKind = isLittleEndian ? 2 : 0;
10032  if (PPC::isVPKUWUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10033  PPC::isVPKUHUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10034  PPC::isVSLDOIShuffleMask(SVOp, ShuffleKind, DAG) != -1 ||
10035  PPC::isVMRGLShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10036  PPC::isVMRGLShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10037  PPC::isVMRGLShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10038  PPC::isVMRGHShuffleMask(SVOp, 1, ShuffleKind, DAG) ||
10039  PPC::isVMRGHShuffleMask(SVOp, 2, ShuffleKind, DAG) ||
10040  PPC::isVMRGHShuffleMask(SVOp, 4, ShuffleKind, DAG) ||
10041  (Subtarget.hasP8Altivec() && (
10042  PPC::isVPKUDUMShuffleMask(SVOp, ShuffleKind, DAG) ||
10043  PPC::isVMRGEOShuffleMask(SVOp, true, ShuffleKind, DAG) ||
10044  PPC::isVMRGEOShuffleMask(SVOp, false, ShuffleKind, DAG))))
10045  return Op;
10046 
10047  // Check to see if this is a shuffle of 4-byte values. If so, we can use our
10048  // perfect shuffle table to emit an optimal matching sequence.
10049  ArrayRef<int> PermMask = SVOp->getMask();
10050 
10051  unsigned PFIndexes[4];
10052  bool isFourElementShuffle = true;
10053  for (unsigned i = 0; i != 4 && isFourElementShuffle; ++i) { // Element number
10054  unsigned EltNo = 8; // Start out undef.
10055  for (unsigned j = 0; j != 4; ++j) { // Intra-element byte.
10056  if (PermMask[i*4+j] < 0)
10057  continue; // Undef, ignore it.
10058 
10059  unsigned ByteSource = PermMask[i*4+j];
10060  if ((ByteSource & 3) != j) {
10061  isFourElementShuffle = false;
10062  break;
10063  }
10064 
10065  if (EltNo == 8) {
10066  EltNo = ByteSource/4;
10067  } else if (EltNo != ByteSource/4) {
10068  isFourElementShuffle = false;
10069  break;
10070  }
10071  }
10072  PFIndexes[i] = EltNo;
10073  }
10074 
10075  // If this shuffle can be expressed as a shuffle of 4-byte elements, use the
10076  // perfect shuffle vector to determine if it is cost effective to do this as
10077  // discrete instructions, or whether we should use a vperm.
10078  // For now, we skip this for little endian until such time as we have a
10079  // little-endian perfect shuffle table.
10080  if (isFourElementShuffle && !isLittleEndian) {
10081  // Compute the index in the perfect shuffle table.
10082  unsigned PFTableIndex =
10083  PFIndexes[0]*9*9*9+PFIndexes[1]*9*9+PFIndexes[2]*9+PFIndexes[3];
10084 
10085  unsigned PFEntry = PerfectShuffleTable[PFTableIndex];
10086  unsigned Cost = (PFEntry >> 30);
10087 
10088  // Determining when to avoid vperm is tricky. Many things affect the cost
10089  // of vperm, particularly how many times the perm mask needs to be computed.
10090  // For example, if the perm mask can be hoisted out of a loop or is already
10091  // used (perhaps because there are multiple permutes with the same shuffle
10092  // mask?) the vperm has a cost of 1. OTOH, hoisting the permute mask out of
10093  // the loop requires an extra register.
10094  //
10095  // As a compromise, we only emit discrete instructions if the shuffle can be
10096  // generated in 3 or fewer operations. When we have loop information
10097  // available, if this block is within a loop, we should avoid using vperm
10098  // for 3-operation perms and use a constant pool load instead.
10099  if (Cost < 3)
10100  return GeneratePerfectShuffle(PFEntry, V1, V2, DAG, dl);
10101  }
10102 
10103  // Lower this to a VPERM(V1, V2, V3) expression, where V3 is a constant
10104  // vector that will get spilled to the constant pool.
10105  if (V2.isUndef()) V2 = V1;
10106 
10107  // The SHUFFLE_VECTOR mask is almost exactly what we want for vperm, except
10108  // that it is in input element units, not in bytes. Convert now.
10109 
10110  // For little endian, the order of the input vectors is reversed, and
10111  // the permutation mask is complemented with respect to 31. This is
10112  // necessary to produce proper semantics with the big-endian-biased vperm
10113  // instruction.
10114  EVT EltVT = V1.getValueType().getVectorElementType();
10115  unsigned BytesPerElement = EltVT.getSizeInBits()/8;
10116 
10117  SmallVector<SDValue, 16> ResultMask;
10118  for (unsigned i = 0, e = VT.getVectorNumElements(); i != e; ++i) {
10119  unsigned SrcElt = PermMask[i] < 0 ? 0 : PermMask[i];
10120 
10121  for (unsigned j = 0; j != BytesPerElement; ++j)
10122  if (isLittleEndian)
10123  ResultMask.push_back(DAG.getConstant(31 - (SrcElt*BytesPerElement + j),
10124  dl, MVT::i32));
10125  else
10126  ResultMask.push_back(DAG.getConstant(SrcElt*BytesPerElement + j, dl,
10127  MVT::i32));
10128  }
10129 
10130  ShufflesHandledWithVPERM++;
10131  SDValue VPermMask = DAG.getBuildVector(MVT::v16i8, dl, ResultMask);
10132  LLVM_DEBUG(dbgs() << "Emitting a VPERM for the following shuffle:\n");
10133  LLVM_DEBUG(SVOp->dump());
10134  LLVM_DEBUG(dbgs() << "With the following permute control vector:\n");
10135  LLVM_DEBUG(VPermMask.dump());
10136 
10137  if (isLittleEndian)
10138  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10139  V2, V1, VPermMask);
10140  else
10141  return DAG.getNode(PPCISD::VPERM, dl, V1.getValueType(),
10142  V1, V2, VPermMask);
10143 }
10144 
10145 /// getVectorCompareInfo - Given an intrinsic, return false if it is not a
10146 /// vector comparison. If it is, return true and fill in Opc/isDot with
10147 /// information about the intrinsic.
10148 static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc,
10149  bool &isDot, const PPCSubtarget &Subtarget) {
10150  unsigned IntrinsicID =
10151  cast<ConstantSDNode>(Intrin.getOperand(0))->getZExtValue();
10152  CompareOpc = -1;
10153  isDot = false;
10154  switch (IntrinsicID) {
10155  default:
10156  return false;
10157  // Comparison predicates.
10158  case Intrinsic::ppc_altivec_vcmpbfp_p:
10159  CompareOpc = 966;
10160  isDot = true;
10161  break;
10162  case Intrinsic::ppc_altivec_vcmpeqfp_p:
10163  CompareOpc = 198;
10164  isDot = true;
10165  break;
10166  case Intrinsic::ppc_altivec_vcmpequb_p:
10167  CompareOpc = 6;
10168  isDot = true;
10169  break;
10170  case Intrinsic::ppc_altivec_vcmpequh_p:
10171  CompareOpc = 70;
10172  isDot = true;
10173  break;
10174  case Intrinsic::ppc_altivec_vcmpequw_p:
10175  CompareOpc = 134;
10176  isDot = true;
10177  break;
10178  case Intrinsic::ppc_altivec_vcmpequd_p:
10179  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10180  CompareOpc = 199;
10181  isDot = true;
10182  } else
10183  return false;
10184  break;
10185  case Intrinsic::ppc_altivec_vcmpneb_p:
10186  case Intrinsic::ppc_altivec_vcmpneh_p:
10187  case Intrinsic::ppc_altivec_vcmpnew_p:
10188  case Intrinsic::ppc_altivec_vcmpnezb_p:
10189  case Intrinsic::ppc_altivec_vcmpnezh_p:
10190  case Intrinsic::ppc_altivec_vcmpnezw_p:
10191  if (Subtarget.hasP9Altivec()) {
10192  switch (IntrinsicID) {
10193  default:
10194  llvm_unreachable("Unknown comparison intrinsic.");
10195  case Intrinsic::ppc_altivec_vcmpneb_p:
10196  CompareOpc = 7;
10197  break;
10198  case Intrinsic::ppc_altivec_vcmpneh_p:
10199  CompareOpc = 71;
10200  break;
10201  case Intrinsic::ppc_altivec_vcmpnew_p:
10202  CompareOpc = 135;
10203  break;
10204  case Intrinsic::ppc_altivec_vcmpnezb_p:
10205  CompareOpc = 263;
10206  break;
10207  case Intrinsic::ppc_altivec_vcmpnezh_p:
10208  CompareOpc = 327;
10209  break;
10210  case Intrinsic::ppc_altivec_vcmpnezw_p:
10211  CompareOpc = 391;
10212  break;
10213  }
10214  isDot = true;
10215  } else
10216  return false;
10217  break;
10218  case Intrinsic::ppc_altivec_vcmpgefp_p:
10219  CompareOpc = 454;
10220  isDot = true;
10221  break;
10222  case Intrinsic::ppc_altivec_vcmpgtfp_p:
10223  CompareOpc = 710;
10224  isDot = true;
10225  break;
10226  case Intrinsic::ppc_altivec_vcmpgtsb_p:
10227  CompareOpc = 774;
10228  isDot = true;
10229  break;
10230  case Intrinsic::ppc_altivec_vcmpgtsh_p:
10231  CompareOpc = 838;
10232  isDot = true;
10233  break;
10234  case Intrinsic::ppc_altivec_vcmpgtsw_p:
10235  CompareOpc = 902;
10236  isDot = true;
10237  break;
10238  case Intrinsic::ppc_altivec_vcmpgtsd_p:
10239  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10240  CompareOpc = 967;
10241  isDot = true;
10242  } else
10243  return false;
10244  break;
10245  case Intrinsic::ppc_altivec_vcmpgtub_p:
10246  CompareOpc = 518;
10247  isDot = true;
10248  break;
10249  case Intrinsic::ppc_altivec_vcmpgtuh_p:
10250  CompareOpc = 582;
10251  isDot = true;
10252  break;
10253  case Intrinsic::ppc_altivec_vcmpgtuw_p:
10254  CompareOpc = 646;
10255  isDot = true;
10256  break;
10257  case Intrinsic::ppc_altivec_vcmpgtud_p:
10258  if (Subtarget.hasVSX() || Subtarget.hasP8Altivec()) {
10259  CompareOpc = 711;
10260  isDot = true;
10261  } else
10262  return false;
10263  break;
10264 
10265  case Intrinsic::ppc_altivec_vcmpequq:
10266  case Intrinsic::ppc_altivec_vcmpgtsq:
10267  case Intrinsic::ppc_altivec_vcmpgtuq:
10268  if (!Subtarget.isISA3_1())
10269  return false;
10270  switch (IntrinsicID) {
10271  default:
10272  llvm_unreachable("Unknown comparison intrinsic.");
10273  case Intrinsic::ppc_altivec_vcmpequq:
10274  CompareOpc = 455;
10275  break;
10276  case Intrinsic::ppc_altivec_vcmpgtsq:
10277  CompareOpc = 903;
10278  break;
10279  case Intrinsic::ppc_altivec_vcmpgtuq:
10280  CompareOpc = 647;
10281  break;
10282  }
10283  break;
10284 
10285  // VSX predicate comparisons use the same infrastructure
10286  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10287  case Intrinsic::ppc_vsx_xvcmpgedp_p:
10288  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10289  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10290  case Intrinsic::ppc_vsx_xvcmpgesp_p:
10291  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10292  if (Subtarget.hasVSX()) {
10293  switch (IntrinsicID) {
10294  case Intrinsic::ppc_vsx_xvcmpeqdp_p:
10295  CompareOpc = 99;
10296  break;
10297  case Intrinsic::ppc_vsx_xvcmpgedp_p:
10298  CompareOpc = 115;
10299  break;
10300  case Intrinsic::ppc_vsx_xvcmpgtdp_p:
10301  CompareOpc = 107;
10302  break;
10303  case Intrinsic::ppc_vsx_xvcmpeqsp_p:
10304  CompareOpc = 67;
10305  break;
10306  case Intrinsic::ppc_vsx_xvcmpgesp_p:
10307  CompareOpc = 83;
10308  break;
10309  case Intrinsic::ppc_vsx_xvcmpgtsp_p:
10310  CompareOpc = 75;
10311  break;
10312  }
10313  isDot = true;
10314  } else
10315  return false;
10316  break;
10317 
10318  // Normal Comparisons.
10319  case Intrinsic::ppc_altivec_vcmpbfp:
10320  CompareOpc = 966;
10321  break;
10322  case Intrinsic::ppc_altivec_vcmpeqfp:
10323  CompareOpc = 198;
10324  break;
10325  case Intrinsic::ppc_altivec_vcmpequb:
10326  CompareOpc = 6;
10327  break;
10328  case Intrinsic::ppc_altivec_vcmpequh:
10329  CompareOpc = 70;
10330  break;
10331  case Intrinsic::ppc_altivec_vcmpequw:
10332  CompareOpc = 134;
10333  break;
10334  case Intrinsic::ppc_altivec_vcmpequd:
10335  if (Subtarget.hasP8Altivec())
10336  CompareOpc = 199;
10337  else
10338  return false;
10339  break;
10340  case Intrinsic::ppc_altivec_vcmpneb:
10341  case Intrinsic::ppc_altivec_vcmpneh:
10342  case Intrinsic::ppc_altivec_vcmpnew:
10343  case Intrinsic::ppc_altivec_vcmpnezb:
10344  case Intrinsic::ppc_altivec_vcmpnezh:
10345  case Intrinsic::ppc_altivec_vcmpnezw:
10346  if (Subtarget.hasP9Altivec())
10347  switch (IntrinsicID) {
10348  default:
10349  llvm_unreachable("Unknown comparison intrinsic.");
10350  case Intrinsic::ppc_altivec_vcmpneb:
10351  CompareOpc = 7;
10352  break;
10353  case Intrinsic::ppc_altivec_vcmpneh:
10354  CompareOpc = 71;
10355  break;
10356  case Intrinsic::ppc_altivec_vcmpnew:
10357  CompareOpc = 135;
10358  break;
10359  case Intrinsic::ppc_altivec_vcmpnezb:
10360  CompareOpc = 263;
10361  break;
10362  case Intrinsic::ppc_altivec_vcmpnezh:
10363  CompareOpc = 327;
10364  break;
10365  case Intrinsic::ppc_altivec_vcmpnezw:
10366  CompareOpc = 391;
10367  break;
10368  }
10369  else
10370  return false;
10371  break;
10372  case Intrinsic::ppc_altivec_vcmpgefp:
10373  CompareOpc = 454;
10374  break;
10375  case Intrinsic::ppc_altivec_vcmpgtfp:
10376  CompareOpc = 710;
10377  break;
10378  case Intrinsic::ppc_altivec_vcmpgtsb:
10379  CompareOpc = 774;
10380  break;
10381  case Intrinsic::ppc_altivec_vcmpgtsh:
10382  CompareOpc = 838;
10383  break;
10384  case Intrinsic::ppc_altivec_vcmpgtsw:
10385  CompareOpc = 902;
10386  break;
10387  case Intrinsic::ppc_altivec_vcmpgtsd:
10388  if (Subtarget.hasP8Altivec())
10389  CompareOpc = 967;
10390  else
10391  return false;
10392  break;
10393  case Intrinsic::ppc_altivec_vcmpgtub:
10394  CompareOpc = 518;
10395  break;
10396  case Intrinsic::ppc_altivec_vcmpgtuh:
10397  CompareOpc = 582;
10398  break;
10399  case Intrinsic::ppc_altivec_vcmpgtuw:
10400  CompareOpc = 646;
10401  break;
10402  case Intrinsic::ppc_altivec_vcmpgtud:
10403  if (Subtarget.hasP8Altivec())
10404  CompareOpc = 711;
10405  else
10406  return false;
10407  break;
10408  case Intrinsic::ppc_altivec_vcmpequq_p:
10409  case Intrinsic::ppc_altivec_vcmpgtsq_p:
10410  case Intrinsic::ppc_altivec_vcmpgtuq_p:
10411  if (!Subtarget.isISA3_1())
10412  return false;
10413  switch (IntrinsicID) {
10414  default:
10415  llvm_unreachable("Unknown comparison intrinsic.");
10416  case Intrinsic::ppc_altivec_vcmpequq_p:
10417  CompareOpc = 455;
10418  break;
10419  case Intrinsic::ppc_altivec_vcmpgtsq_p:
10420  CompareOpc = 903;
10421  break;
10422  case Intrinsic::ppc_altivec_vcmpgtuq_p:
10423  CompareOpc = 647;
10424  break;
10425  }
10426  isDot = true;
10427  break;
10428  }
10429  return true;
10430 }
10431 
10432 /// LowerINTRINSIC_WO_CHAIN - If this is an intrinsic that we want to custom
10433 /// lower, do it, otherwise return null.
10434 SDValue PPCTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
10435  SelectionDAG &DAG) const {
10436  unsigned IntrinsicID =
10437  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
10438 
10439  SDLoc dl(Op);
10440 
10441  switch (IntrinsicID) {
10442  case Intrinsic::thread_pointer:
10443  // Reads the thread pointer register, used for __builtin_thread_pointer.
10444  if (Subtarget.isPPC64())
10445  return DAG.getRegister(PPC::X13, MVT::i64);
10446  return DAG.getRegister(PPC::R2, MVT::i32);
10447 
10448  case Intrinsic::ppc_mma_disassemble_acc:
10449  case Intrinsic::ppc_vsx_disassemble_pair: {
10450  int NumVecs = 2;
10451  SDValue WideVec = Op.getOperand(1);
10452  if (IntrinsicID == Intrinsic::ppc_mma_disassemble_acc) {
10453  NumVecs = 4;
10454  WideVec = DAG.getNode(PPCISD::XXMFACC, dl, MVT::v512i1, WideVec);
10455  }
10456  SmallVector<SDValue, 4> RetOps;
10457  for (int VecNo = 0; VecNo < NumVecs; VecNo++) {
10458  SDValue Extract = DAG.getNode(
10459  PPCISD::EXTRACT_VSX_REG, dl, MVT::v16i8, WideVec,
10460  DAG.getConstant(Subtarget.isLittleEndian() ? NumVecs - 1 - VecNo
10461  : VecNo,
10462  dl, getPointerTy(DAG.getDataLayout())));
10463  RetOps.push_back(Extract);
10464  }
10465  return DAG.getMergeValues(RetOps, dl);
10466  }
10467 
10468  case Intrinsic::ppc_unpack_longdouble: {
10469  auto *Idx = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10470  assert(Idx && (Idx->getSExtValue() == 0 || Idx->getSExtValue() == 1) &&
10471  "Argument of long double unpack must be 0 or 1!");
10472  return DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Op.getOperand(1),
10473  DAG.getConstant(!!(Idx->getSExtValue()), dl,
10474  Idx->getValueType(0)));
10475  }
10476 
10477  case Intrinsic::ppc_compare_exp_lt:
10478  case Intrinsic::ppc_compare_exp_gt:
10479  case Intrinsic::ppc_compare_exp_eq:
10480  case Intrinsic::ppc_compare_exp_uo: {
10481  unsigned Pred;
10482  switch (IntrinsicID) {
10483  case Intrinsic::ppc_compare_exp_lt:
10484  Pred = PPC::PRED_LT;
10485  break;
10486  case Intrinsic::ppc_compare_exp_gt:
10487  Pred = PPC::PRED_GT;
10488  break;
10489  case Intrinsic::ppc_compare_exp_eq:
10490  Pred = PPC::PRED_EQ;
10491  break;
10492  case Intrinsic::ppc_compare_exp_uo:
10493  Pred = PPC::PRED_UN;
10494  break;
10495  }
10496  return SDValue(
10497  DAG.getMachineNode(
10498  PPC::SELECT_CC_I4, dl, MVT::i32,
10499  {SDValue(DAG.getMachineNode(PPC::XSCMPEXPDP, dl, MVT::i32,
10500  Op.getOperand(1), Op.getOperand(2)),
10501  0),
10502  DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10503  DAG.getTargetConstant(Pred, dl, MVT::i32)}),
10504  0);
10505  }
10506  case Intrinsic::ppc_test_data_class_d:
10507  case Intrinsic::ppc_test_data_class_f: {
10508  unsigned CmprOpc = PPC::XSTSTDCDP;
10509  if (IntrinsicID == Intrinsic::ppc_test_data_class_f)
10510  CmprOpc = PPC::XSTSTDCSP;
10511  return SDValue(
10512  DAG.getMachineNode(
10513  PPC::SELECT_CC_I4, dl, MVT::i32,
10514  {SDValue(DAG.getMachineNode(CmprOpc, dl, MVT::i32, Op.getOperand(2),
10515  Op.getOperand(1)),
10516  0),
10517  DAG.getConstant(1, dl, MVT::i32), DAG.getConstant(0, dl, MVT::i32),
10518  DAG.getTargetConstant(PPC::PRED_EQ, dl, MVT::i32)}),
10519  0);
10520  }
10521  case Intrinsic::ppc_convert_f128_to_ppcf128:
10522  case Intrinsic::ppc_convert_ppcf128_to_f128: {
10523  RTLIB::Libcall LC = IntrinsicID == Intrinsic::ppc_convert_ppcf128_to_f128
10524  ? RTLIB::CONVERT_PPCF128_F128
10525  : RTLIB::CONVERT_F128_PPCF128;
10526  MakeLibCallOptions CallOptions;
10527  std::pair<SDValue, SDValue> Result =
10528  makeLibCall(DAG, LC, Op.getValueType(), Op.getOperand(1), CallOptions,
10529  dl, SDValue());
10530  return Result.first;
10531  }
10532  }
10533 
10534  // If this is a lowered altivec predicate compare, CompareOpc is set to the
10535  // opcode number of the comparison.
10536  int CompareOpc;
10537  bool isDot;
10538  if (!getVectorCompareInfo(Op, CompareOpc, isDot, Subtarget))
10539  return SDValue(); // Don't custom lower most intrinsics.
10540 
10541  // If this is a non-dot comparison, make the VCMP node and we are done.
10542  if (!isDot) {
10543  SDValue Tmp = DAG.getNode(PPCISD::VCMP, dl, Op.getOperand(2).getValueType(),
10544  Op.getOperand(1), Op.getOperand(2),
10545  DAG.getConstant(CompareOpc, dl, MVT::i32));
10546  return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Tmp);
10547  }
10548 
10549  // Create the PPCISD altivec 'dot' comparison node.
10550  SDValue Ops[] = {
10551  Op.getOperand(2), // LHS
10552  Op.getOperand(3), // RHS
10553  DAG.getConstant(CompareOpc, dl, MVT::i32)
10554  };
10555  EVT VTs[] = { Op.getOperand(2).getValueType(), MVT::Glue };
10556  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
10557 
10558  // Now that we have the comparison, emit a copy from the CR to a GPR.
10559  // This is flagged to the above dot comparison.
10560  SDValue Flags = DAG.getNode(PPCISD::MFOCRF, dl, MVT::i32,
10561  DAG.getRegister(PPC::CR6, MVT::i32),
10562  CompNode.getValue(1));
10563 
10564  // Unpack the result based on how the target uses it.
10565  unsigned BitNo; // Bit # of CR6.
10566  bool InvertBit; // Invert result?
10567  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
10568  default: // Can't happen, don't crash on invalid number though.
10569  case 0: // Return the value of the EQ bit of CR6.
10570  BitNo = 0; InvertBit = false;
10571  break;
10572  case 1: // Return the inverted value of the EQ bit of CR6.
10573  BitNo = 0; InvertBit = true;
10574  break;
10575  case 2: // Return the value of the LT bit of CR6.
10576  BitNo = 2; InvertBit = false;
10577  break;
10578  case 3: // Return the inverted value of the LT bit of CR6.
10579  BitNo = 2; InvertBit = true;
10580  break;
10581  }
10582 
10583  // Shift the bit into the low position.
10584  Flags = DAG.getNode(ISD::SRL, dl, MVT::i32, Flags,
10585  DAG.getConstant(8 - (3 - BitNo), dl, MVT::i32));
10586  // Isolate the bit.
10587  Flags = DAG.getNode(ISD::AND, dl, MVT::i32, Flags,
10588  DAG.getConstant(1, dl, MVT::i32));
10589 
10590  // If we are supposed to, toggle the bit.
10591  if (InvertBit)
10592  Flags = DAG.getNode(ISD::XOR, dl, MVT::i32, Flags,
10593  DAG.getConstant(1, dl, MVT::i32));
10594  return Flags;
10595 }
10596 
10597 SDValue PPCTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
10598  SelectionDAG &DAG) const {
10599  // SelectionDAGBuilder::visitTargetIntrinsic may insert one extra chain to
10600  // the beginning of the argument list.
10601  int ArgStart = isa<ConstantSDNode>(Op.getOperand(0)) ? 0 : 1;
10602  SDLoc DL(Op);
10603  switch (cast<ConstantSDNode>(Op.getOperand(ArgStart))->getZExtValue()) {
10604  case Intrinsic::ppc_cfence: {
10605  assert(ArgStart == 1 && "llvm.ppc.cfence must carry a chain argument.");
10606  assert(Subtarget.isPPC64() && "Only 64-bit is supported for now.");
10607  SDValue Val = Op.getOperand(ArgStart + 1);
10608  EVT Ty = Val.getValueType();
10609  if (Ty == MVT::i128) {
10610  // FIXME: Testing one of two paired registers is sufficient to guarantee
10611  // ordering?
10612  Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i64, Val);
10613  }
10614  return SDValue(
10615  DAG.getMachineNode(PPC::CFENCE8, DL, MVT::Other,
10616  DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Val),
10617  Op.getOperand(0)),
10618  0);
10619  }
10620  default:
10621  break;
10622  }
10623  return SDValue();
10624 }
10625 
10626 // Lower scalar BSWAP64 to xxbrd.
10627 SDValue PPCTargetLowering::LowerBSWAP(SDValue Op, SelectionDAG &DAG) const {
10628  SDLoc dl(Op);
10629  if (!Subtarget.isPPC64())
10630  return Op;
10631  // MTVSRDD
10632  Op = DAG.getNode(ISD::BUILD_VECTOR, dl, MVT::v2i64, Op.getOperand(0),
10633  Op.getOperand(0));
10634  // XXBRD
10635  Op = DAG.getNode(ISD::BSWAP, dl, MVT::v2i64, Op);
10636  // MFVSRD
10637  int VectorIndex = 0;
10638  if (Subtarget.isLittleEndian())
10639  VectorIndex = 1;
10641  DAG.getTargetConstant(VectorIndex, dl, MVT::i32));
10642  return Op;
10643 }
10644 
10645 // ATOMIC_CMP_SWAP for i8/i16 needs to zero-extend its input since it will be
10646 // compared to a value that is atomically loaded (atomic loads zero-extend).
10647 SDValue PPCTargetLowering::LowerATOMIC_CMP_SWAP(SDValue Op,
10648  SelectionDAG &DAG) const {
10649  assert(Op.getOpcode() == ISD::ATOMIC_CMP_SWAP &&
10650  "Expecting an atomic compare-and-swap here.");
10651  SDLoc dl(Op);
10652  auto *AtomicNode = cast<AtomicSDNode>(Op.getNode());
10653  EVT MemVT = AtomicNode->getMemoryVT();
10654  if (MemVT.getSizeInBits() >= 32)
10655  return Op;
10656 
10657  SDValue CmpOp = Op.getOperand(2);
10658  // If this is already correctly zero-extended, leave it alone.
10659  auto HighBits = APInt::getHighBitsSet(32, 32 - MemVT.getSizeInBits());
10660  if (DAG.MaskedValueIsZero(CmpOp, HighBits))
10661  return Op;
10662 
10663  // Clear the high bits of the compare operand.
10664  unsigned MaskVal = (1 << MemVT.getSizeInBits()) - 1;
10665  SDValue NewCmpOp =
10666  DAG.getNode(ISD::AND, dl, MVT::i32, CmpOp,
10667  DAG.getConstant(MaskVal, dl, MVT::i32));
10668 
10669  // Replace the existing compare operand with the properly zero-extended one.
10671  for (int i = 0, e = AtomicNode->getNumOperands(); i < e; i++)
10672  Ops.push_back(AtomicNode->getOperand(i));
10673  Ops[2] = NewCmpOp;
10674  MachineMemOperand *MMO = AtomicNode->getMemOperand();
10675  SDVTList Tys = DAG.getVTList(MVT::i32, MVT::Other);
10676  auto NodeTy =
10678  return DAG.getMemIntrinsicNode(NodeTy, dl, Tys, Ops, MemVT, MMO);
10679 }
10680 
10681 SDValue PPCTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
10682  SelectionDAG &DAG) const {
10683  AtomicSDNode *N = cast<AtomicSDNode>(Op.getNode());
10684  EVT MemVT = N->getMemoryVT();
10685  assert(MemVT.getSimpleVT() == MVT::i128 &&
10686  "Expect quadword atomic operations");
10687  SDLoc dl(N);
10688  unsigned Opc = N->getOpcode();
10689  switch (Opc) {
10690  case ISD::ATOMIC_LOAD: {
10691  // Lower quadword atomic load to int_ppc_atomic_load_i128 which will be
10692  // lowered to ppc instructions by pattern matching instruction selector.
10695  N->getOperand(0),
10696  DAG.getConstant(Intrinsic::ppc_atomic_load_i128, dl, MVT::i32)};
10697  for (int I = 1, E = N->getNumOperands(); I < E; ++I)
10698  Ops.push_back(N->getOperand(I));
10699  SDValue LoadedVal = DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, dl, Tys,
10700  Ops, MemVT, N->getMemOperand());
10701  SDValue ValLo = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal);
10702  SDValue ValHi =
10703  DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i128, LoadedVal.getValue(1));
10704  ValHi = DAG.getNode(ISD::SHL, dl, MVT::i128, ValHi,
10705  DAG.getConstant(64, dl, MVT::i32));
10706  SDValue Val =
10707  DAG.getNode(ISD::OR, dl, {MVT::i128, MVT::Other}, {ValLo, ValHi});
10708  return DAG.getNode(ISD::MERGE_VALUES, dl, {MVT::i128, MVT::Other},
10709  {Val, LoadedVal.getValue(2)});
10710  }
10711  case ISD::ATOMIC_STORE: {
10712  // Lower quadword atomic store to int_ppc_atomic_store_i128 which will be
10713  // lowered to ppc instructions by pattern matching instruction selector.
10714  SDVTList Tys = DAG.getVTList(MVT::Other);
10716  N->getOperand(0),
10717  DAG.getConstant(Intrinsic::ppc_atomic_store_i128, dl, MVT::i32)};
10718  SDValue Val = N->getOperand(2);
10719  SDValue ValLo = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, Val);
10720  SDValue ValHi = DAG.getNode(ISD::SRL, dl, MVT::i128, Val,
10721  DAG.getConstant(64, dl, MVT::i32));
10722  ValHi = DAG.getNode(ISD::TRUNCATE, dl, MVT::i64, ValHi);
10723  Ops.push_back(ValLo);
10724  Ops.push_back(ValHi);
10725  Ops.push_back(N->getOperand(1));
10726  return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, dl, Tys, Ops, MemVT,
10727  N->getMemOperand());
10728  }
10729  default:
10730  llvm_unreachable("Unexpected atomic opcode");
10731  }
10732 }
10733 
10734 SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
10735  SelectionDAG &DAG) const {
10736  SDLoc dl(Op);
10737  // Create a stack slot that is 16-byte aligned.
10739  int FrameIdx = MFI.CreateStackObject(16, Align(16), false);
10740  EVT PtrVT = getPointerTy(DAG.getDataLayout());
10741  SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
10742 
10743  // Store the input value into Value#0 of the stack slot.
10744  SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0), FIdx,
10745  MachinePointerInfo());
10746  // Load it out.
10747  return DAG.getLoad(Op.getValueType(), dl, Store, FIdx, MachinePointerInfo());
10748 }
10749 
10750 SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
10751  SelectionDAG &DAG) const {
10752  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT &&
10753  "Should only be called for ISD::INSERT_VECTOR_ELT");
10754 
10755  ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(2));
10756 
10757  EVT VT = Op.getValueType();
10758  SDLoc dl(Op);
10759  SDValue V1 = Op.getOperand(0);
10760  SDValue V2 = Op.getOperand(1);
10761 
10762  if (VT == MVT::v2f64 && C)
10763  return Op;
10764 
10765  if (Subtarget.hasP9Vector()) {
10766  // A f32 load feeding into a v4f32 insert_vector_elt is handled in this way
10767  // because on P10, it allows this specific insert_vector_elt load pattern to
10768  // utilize the refactored load and store infrastructure in order to exploit
10769  // prefixed loads.
10770  // On targets with inexpensive direct moves (Power9 and up), a
10771  // (insert_vector_elt v4f32:$vec, (f32 load)) is always better as an integer
10772  // load since a single precision load will involve conversion to double
10773  // precision on the load followed by another conversion to single precision.
10774  if ((VT == MVT::v4f32) && (V2.getValueType() == MVT::f32) &&
10775  (isa<LoadSDNode>(V2))) {
10776  SDValue BitcastVector = DAG.getBitcast(MVT::v4i32, V1);
10777  SDValue BitcastLoad = DAG.getBitcast(MVT::i32, V2);
10778  SDValue InsVecElt =
10779  DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v4i32, BitcastVector,
10780  BitcastLoad, Op.getOperand(2));
10781  return DAG.getBitcast(MVT::v4f32, InsVecElt);
10782  }
10783  }
10784 
10785  if (Subtarget.isISA3_1()) {
10786  if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
10787  return SDValue();
10788  // On P10, we have legal lowering for constant and variable indices for
10789  // all vectors.
10790  if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
10791  VT == MVT::v2i64 || VT == MVT::v4f32 || VT == MVT::v2f64)
10792  return Op;
10793  }
10794 
10795  // Before P10, we have legal lowering for constant indices but not for
10796  // variable ones.
10797  if (!C)
10798  return SDValue();
10799 
10800  // We can use MTVSRZ + VECINSERT for v8i16 and v16i8 types.
10801  if (VT == MVT::v8i16 || VT == MVT::v16i8) {
10802  SDValue Mtvsrz = DAG.getNode(PPCISD::MTVSRZ, dl, VT, V2);
10803  unsigned BytesInEachElement = VT.getVectorElementType().getSizeInBits() / 8;
10804  unsigned InsertAtElement = C->getZExtValue();
10805  unsigned InsertAtByte = InsertAtElement * BytesInEachElement;
10806  if (Subtarget.isLittleEndian()) {
10807  InsertAtByte = (16 - BytesInEachElement) - InsertAtByte;
10808  }
10809  return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, Mtvsrz,
10810  DAG.getConstant(InsertAtByte, dl, MVT::i32));
10811  }
10812  return Op;
10813 }
10814 
10815 SDValue PPCTargetLowering::LowerVectorLoad(SDValue Op,
10816  SelectionDAG &DAG) const {
10817  SDLoc dl(Op);
10818  LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
10819  SDValue LoadChain = LN->getChain();
10820  SDValue BasePtr = LN->getBasePtr();
10821  EVT VT = Op.getValueType();
10822 
10823  if (VT != MVT::v256i1 && VT != MVT::v512i1)
10824  return Op;
10825 
10826  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10827  // Here we create 2 or 4 v16i8 loads to load the pair or accumulator value in
10828  // 2 or 4 vsx registers.
10829  assert((VT != MVT::v512i1 || Subtarget.hasMMA()) &&
10830  "Type unsupported without MMA");
10831  assert((VT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10832  "Type unsupported without paired vector support");
10833  Align Alignment = LN->getAlign();
10835  SmallVector<SDValue, 4> LoadChains;
10836  unsigned NumVecs = VT.getSizeInBits() / 128;
10837  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10838  SDValue Load =
10839  DAG.getLoad(MVT::v16i8, dl, LoadChain, BasePtr,
10840  LN->getPointerInfo().getWithOffset(Idx * 16),
10841  commonAlignment(Alignment, Idx * 16),
10842  LN->getMemOperand()->getFlags(), LN->getAAInfo());
10843  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10844  DAG.getConstant(16, dl, BasePtr.getValueType()));
10845  Loads.push_back(Load);
10846  LoadChains.push_back(Load.getValue(1));
10847  }
10848  if (Subtarget.isLittleEndian()) {
10849  std::reverse(Loads.begin(), Loads.end());
10850  std::reverse(LoadChains.begin(), LoadChains.end());
10851  }
10852  SDValue TF = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, LoadChains);
10853  SDValue Value =
10855  dl, VT, Loads);
10856  SDValue RetOps[] = {Value, TF};
10857  return DAG.getMergeValues(RetOps, dl);
10858 }
10859 
10860 SDValue PPCTargetLowering::LowerVectorStore(SDValue Op,
10861  SelectionDAG &DAG) const {
10862  SDLoc dl(Op);
10863  StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
10864  SDValue StoreChain = SN->getChain();
10865  SDValue BasePtr = SN->getBasePtr();
10866  SDValue Value = SN->getValue();
10867  EVT StoreVT = Value.getValueType();
10868 
10869  if (StoreVT != MVT::v256i1 && StoreVT != MVT::v512i1)
10870  return Op;
10871 
10872  // Type v256i1 is used for pairs and v512i1 is used for accumulators.
10873  // Here we create 2 or 4 v16i8 stores to store the pair or accumulator
10874  // underlying registers individually.
10875  assert((StoreVT != MVT::v512i1 || Subtarget.hasMMA()) &&
10876  "Type unsupported without MMA");
10877  assert((StoreVT != MVT::v256i1 || Subtarget.pairedVectorMemops()) &&
10878  "Type unsupported without paired vector support");
10879  Align Alignment = SN->getAlign();
10880  SmallVector<SDValue, 4> Stores;
10881  unsigned NumVecs = 2;
10882  if (StoreVT == MVT::v512i1) {
10884  NumVecs = 4;
10885  }
10886  for (unsigned Idx = 0; Idx < NumVecs; ++Idx) {
10887  unsigned VecNum = Subtarget.isLittleEndian() ? NumVecs - 1 - Idx : Idx;
10889  DAG.getConstant(VecNum, dl, getPointerTy(DAG.getDataLayout())));
10890  SDValue Store =
10891  DAG.getStore(StoreChain, dl, Elt, BasePtr,
10892  SN->getPointerInfo().getWithOffset(Idx * 16),
10893  commonAlignment(Alignment, Idx * 16),
10894  SN->getMemOperand()->getFlags(), SN->getAAInfo());
10895  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
10896  DAG.getConstant(16, dl, BasePtr.getValueType()));
10897  Stores.push_back(Store);
10898  }
10899  SDValue TF = DAG.getTokenFactor(dl, Stores);
10900  return TF;
10901 }
10902 
10903 SDValue PPCTargetLowering::LowerMUL(SDValue Op, SelectionDAG &DAG) const {
10904  SDLoc dl(Op);
10905  if (Op.getValueType() == MVT::v4i32) {
10906  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10907 
10908  SDValue Zero = getCanonicalConstSplat(0, 1, MVT::v4i32, DAG, dl);
10909  // +16 as shift amt.
10910  SDValue Neg16 = getCanonicalConstSplat(-16, 4, MVT::v4i32, DAG, dl);
10911  SDValue RHSSwap = // = vrlw RHS, 16
10912  BuildIntrinsicOp(Intrinsic::ppc_altivec_vrlw, RHS, Neg16, DAG, dl);
10913 
10914  // Shrinkify inputs to v8i16.
10915  LHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, LHS);
10916  RHS = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHS);
10917  RHSSwap = DAG.getNode(ISD::BITCAST, dl, MVT::v8i16, RHSSwap);
10918 
10919  // Low parts multiplied together, generating 32-bit results (we ignore the
10920  // top parts).
10921  SDValue LoProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmulouh,
10922  LHS, RHS, DAG, dl, MVT::v4i32);
10923 
10924  SDValue HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmsumuhm,
10925  LHS, RHSSwap, Zero, DAG, dl, MVT::v4i32);
10926  // Shift the high parts up 16 bits.
10927  HiProd = BuildIntrinsicOp(Intrinsic::ppc_altivec_vslw, HiProd,
10928  Neg16, DAG, dl);
10929  return DAG.getNode(ISD::ADD, dl, MVT::v4i32, LoProd, HiProd);
10930  } else if (Op.getValueType() == MVT::v16i8) {
10931  SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1);
10932  bool isLittleEndian = Subtarget.isLittleEndian();
10933 
10934  // Multiply the even 8-bit parts, producing 16-bit sums.
10935  SDValue EvenParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuleub,
10936  LHS, RHS, DAG, dl, MVT::v8i16);
10937  EvenParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, EvenParts);
10938 
10939  // Multiply the odd 8-bit parts, producing 16-bit sums.
10940  SDValue OddParts = BuildIntrinsicOp(Intrinsic::ppc_altivec_vmuloub,
10941  LHS, RHS, DAG, dl, MVT::v8i16);
10942  OddParts = DAG.getNode(ISD::BITCAST, dl, MVT::v16i8, OddParts);
10943 
10944  // Merge the results together. Because vmuleub and vmuloub are
10945  // instructions with a big-endian bias, we must reverse the
10946  // element numbering and reverse the meaning of "odd" and "even"
10947  // when generating little endian code.
10948  int Ops[16];
10949  for (unsigned i = 0; i != 8; ++i) {
10950  if (isLittleEndian) {
10951  Ops[i*2 ] = 2*i;
10952  Ops[i*2+1] = 2*i+16;
10953  } else {
10954  Ops[i*2 ] = 2*i+1;
10955  Ops[i*2+1] = 2*i+1+16;
10956  }
10957  }
10958  if (isLittleEndian)
10959  return DAG.getVectorShuffle(MVT::v16i8, dl, OddParts, EvenParts, Ops);
10960  else
10961  return DAG.getVectorShuffle(MVT::v16i8, dl, EvenParts, OddParts, Ops);
10962  } else {
10963  llvm_unreachable("Unknown mul to lower!");
10964  }
10965 }
10966 
10967 SDValue PPCTargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const {
10968  bool IsStrict = Op->isStrictFPOpcode();
10969  if (Op.getOperand(IsStrict ? 1 : 0).getValueType() == MVT::f128 &&
10970  !Subtarget.hasP9Vector())
10971  return SDValue();
10972 
10973  return Op;
10974 }
10975 
10976 // Custom lowering for fpext vf32 to v2f64
10977 SDValue PPCTargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const {
10978 
10979  assert(Op.getOpcode() == ISD::FP_EXTEND &&
10980  "Should only be called for ISD::FP_EXTEND");
10981 
10982  // FIXME: handle extends from half precision float vectors on P9.
10983  // We only want to custom lower an extend from v2f32 to v2f64.
10984  if (Op.getValueType() != MVT::v2f64 ||
10985  Op.getOperand(0).getValueType() != MVT::v2f32)
10986  return SDValue();
10987 
10988  SDLoc dl(Op);
10989  SDValue Op0 = Op.getOperand(0);
10990 
10991  switch (Op0.getOpcode()) {
10992  default:
10993  return SDValue();
10994  case ISD::EXTRACT_SUBVECTOR: {
10995  assert(Op0.getNumOperands() == 2 &&
10996  isa<ConstantSDNode>(Op0->getOperand(1)) &&
10997  "Node should have 2 operands with second one being a constant!");
10998 
10999  if (Op0.getOperand(0).getValueType() != MVT::v4f32)
11000  return SDValue();
11001 
11002  // Custom lower is only done for high or low doubleword.
11003  int Idx = cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue();
11004  if (Idx % 2 != 0)
11005  return SDValue();
11006 
11007  // Since input is v4f32, at this point Idx is either 0 or 2.
11008  // Shift to get the doubleword position we want.
11009  int DWord = Idx >> 1;
11010 
11011  // High and low word positions are different on little endian.
11012  if (Subtarget.isLittleEndian())
11013  DWord ^= 0x1;
11014 
11015  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64,
11016  Op0.getOperand(0), DAG.getConstant(DWord, dl, MVT::i32));
11017  }
11018  case ISD::FADD:
11019  case ISD::FMUL:
11020  case ISD::FSUB: {
11021  SDValue NewLoad[2];
11022  for (unsigned i = 0, ie = Op0.getNumOperands(); i != ie; ++i) {
11023  // Ensure both input are loads.
11024  SDValue LdOp = Op0.getOperand(i);
11025  if (LdOp.getOpcode() != ISD::LOAD)
11026  return SDValue();
11027  // Generate new load node.
11028  LoadSDNode *LD = cast<LoadSDNode>(LdOp);
11029  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11030  NewLoad[i] = DAG.getMemIntrinsicNode(
11031  PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11032  LD->getMemoryVT(), LD->getMemOperand());
11033  }
11034  SDValue NewOp =
11035  DAG.getNode(Op0.getOpcode(), SDLoc(Op0), MVT::v4f32, NewLoad[0],
11036  NewLoad[1], Op0.getNode()->getFlags());
11037  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewOp,
11038  DAG.getConstant(0, dl, MVT::i32));
11039  }
11040  case ISD::LOAD: {
11041  LoadSDNode *LD = cast<LoadSDNode>(Op0);
11042  SDValue LoadOps[] = {LD->getChain(), LD->getBasePtr()};
11043  SDValue NewLd = DAG.getMemIntrinsicNode(
11044  PPCISD::LD_VSX_LH, dl, DAG.getVTList(MVT::v4f32, MVT::Other), LoadOps,
11045  LD->getMemoryVT(), LD->getMemOperand());
11046  return DAG.getNode(PPCISD::FP_EXTEND_HALF, dl, MVT::v2f64, NewLd,
11047  DAG.getConstant(0, dl, MVT::i32));
11048  }
11049  }
11050  llvm_unreachable("ERROR:Should return for all cases within swtich.");
11051 }
11052 
11053 /// LowerOperation - Provide custom lowering hooks for some operations.
11054 ///
11056  switch (Op.getOpcode()) {
11057  default: llvm_unreachable("Wasn't expecting to be able to lower this!");
11058  case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
11059  case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
11060  case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
11061  case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
11062  case ISD::JumpTable: return LowerJumpTable(Op, DAG);
11063  case ISD::STRICT_FSETCC:
11064  case ISD::STRICT_FSETCCS:
11065  case ISD::SETCC: return LowerSETCC(Op, DAG);
11066  case ISD::INIT_TRAMPOLINE: return LowerINIT_TRAMPOLINE(Op, DAG);
11067  case ISD::ADJUST_TRAMPOLINE: return LowerADJUST_TRAMPOLINE(Op, DAG);
11068 
11069  case ISD::INLINEASM:
11070  case ISD::INLINEASM_BR: return LowerINLINEASM(Op, DAG);
11071  // Variable argument lowering.
11072  case ISD::VASTART: return LowerVASTART(Op, DAG);
11073  case ISD::VAARG: return LowerVAARG(Op, DAG);
11074  case ISD::VACOPY: return LowerVACOPY(Op, DAG);
11075 
11076  case ISD::STACKRESTORE: return LowerSTACKRESTORE(Op, DAG);
11077  case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
11079  return LowerGET_DYNAMIC_AREA_OFFSET(Op, DAG);
11080 
11081  // Exception handling lowering.
11082  case ISD::EH_DWARF_CFA: return LowerEH_DWARF_CFA(Op, DAG);
11083  case ISD::EH_SJLJ_SETJMP: return lowerEH_SJLJ_SETJMP(Op, DAG);
11084  case ISD::EH_SJLJ_LONGJMP: return lowerEH_SJLJ_LONGJMP(Op, DAG);
11085 
11086  case ISD::LOAD: return LowerLOAD(Op, DAG);
11087  case ISD::STORE: return LowerSTORE(Op, DAG);
11088  case ISD::TRUNCATE: return LowerTRUNCATE(Op, DAG);
11089  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
11092  case ISD::FP_TO_UINT:
11093  case ISD::FP_TO_SINT: return LowerFP_TO_INT(Op, DAG, SDLoc(Op));
11096  case ISD::UINT_TO_FP:
11097  case ISD::SINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
11098  case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
11099 
11100  // Lower 64-bit shifts.
11101  case ISD::SHL_PARTS: return LowerSHL_PARTS(Op, DAG);
11102  case ISD::SRL_PARTS: return LowerSRL_PARTS(Op, DAG);
11103  case ISD::SRA_PARTS: return LowerSRA_PARTS(Op, DAG);
11104 
11105  case ISD::FSHL: return LowerFunnelShift(Op, DAG);
11106  case ISD::FSHR: return LowerFunnelShift(Op, DAG);
11107 
11108  // Vector-related lowering.
11109  case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
11110  case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
11111  case ISD::INTRINSIC_WO_CHAIN: return LowerINTRINSIC_WO_CHAIN(Op, DAG);
11112  case ISD::SCALAR_TO_VECTOR: return LowerSCALAR_TO_VECTOR(Op, DAG);
11113  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
11114  case ISD::MUL: return LowerMUL(Op, DAG);
11115  case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG);
11116  case ISD::STRICT_FP_ROUND:
11117  case ISD::FP_ROUND:
11118  return LowerFP_ROUND(Op, DAG);
11119  case ISD::ROTL: return LowerROTL(Op, DAG);
11120 
11121  // For counter-based loop handling.
11122  case ISD::INTRINSIC_W_CHAIN: return SDValue();
11123 
11124  case ISD::BITCAST: return LowerBITCAST(Op, DAG);
11125 
11126  // Frame & Return address.
11127  case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
11128  case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
11129 
11130  case ISD::INTRINSIC_VOID:
11131  return LowerINTRINSIC_VOID(Op, DAG);
11132  case ISD::BSWAP:
11133  return LowerBSWAP(Op, DAG);
11134  case ISD::ATOMIC_CMP_SWAP:
11135  return LowerATOMIC_CMP_SWAP(Op, DAG);
11136  case ISD::ATOMIC_STORE:
11137  return LowerATOMIC_LOAD_STORE(Op, DAG);
11138  }
11139 }
11140 
11143  SelectionDAG &DAG) const {
11144  SDLoc dl(N);
11145  switch (N->getOpcode()) {
11146  default:
11147  llvm_unreachable("Do not know how to custom type legalize this operation!");
11148  case ISD::ATOMIC_LOAD: {
11149  SDValue Res = LowerATOMIC_LOAD_STORE(SDValue(N, 0), DAG);
11150  Results.push_back(Res);
11151  Results.push_back(Res.getValue(1));
11152  break;
11153  }
11154  case ISD::READCYCLECOUNTER: {
11156  SDValue RTB = DAG.getNode(PPCISD::READ_TIME_BASE, dl, VTs, N->getOperand(0));
11157 
11158  Results.push_back(
11159  DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, RTB, RTB.getValue(1)));
11160  Results.push_back(RTB.getValue(2));
11161  break;
11162  }
11163  case ISD::INTRINSIC_W_CHAIN: {
11164  if (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue() !=
11165  Intrinsic::loop_decrement)
11166  break;
11167 
11168  assert(N->getValueType(0) == MVT::i1 &&
11169  "Unexpected result type for CTR decrement intrinsic");
11170  EVT SVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
11171  N->getValueType(0));
11172  SDVTList VTs = DAG.getVTList(SVT, MVT::Other);
11173  SDValue NewInt = DAG.getNode(N->getOpcode(), dl, VTs, N->getOperand(0),
11174  N->getOperand(1));
11175 
11176  Results.push_back(DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, NewInt));
11177  Results.push_back(NewInt.getValue(1));
11178  break;
11179  }
11180  case ISD::INTRINSIC_WO_CHAIN: {
11181  switch (cast<ConstantSDNode>(N->getOperand(0))->getZExtValue()) {
11182  case Intrinsic::ppc_pack_longdouble:
11183  Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::ppcf128,
11184  N->getOperand(2), N->getOperand(1)));
11185  break;
11186  case Intrinsic::ppc_convert_f128_to_ppcf128:
11187  Results.push_back(LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), DAG));
11188  break;
11189  }
11190  break;
11191  }
11192  case ISD::VAARG: {
11193  if (!Subtarget.isSVR4ABI() || Subtarget.isPPC64())
11194  return;
11195 
11196  EVT VT = N->getValueType(0);
11197 
11198  if (VT == MVT::i64) {
11199  SDValue NewNode = LowerVAARG(SDValue(N, 1), DAG);
11200 
11201  Results.push_back(NewNode);
11202  Results.push_back(NewNode.getValue(1));
11203  }
11204  return;
11205  }
11208  case ISD::FP_TO_SINT:
11209  case ISD::FP_TO_UINT: {
11210  // LowerFP_TO_INT() can only handle f32 and f64.
11211  if (N->getOperand(N->isStrictFPOpcode() ? 1 : 0).getValueType() ==
11212  MVT::ppcf128)
11213  return;
11214  SDValue LoweredValue = LowerFP_TO_INT(SDValue(N, 0), DAG, dl);
11215  Results.push_back(LoweredValue);
11216  if (N->isStrictFPOpcode())
11217  Results.push_back(LoweredValue.getValue(1));
11218  return;
11219  }
11220  case ISD::TRUNCATE: {
11221  if (!N->getValueType(0).isVector())
11222  return;
11223  SDValue Lowered = LowerTRUNCATEVector(SDValue(N, 0), DAG);
11224  if (Lowered)
11225  Results.push_back(Lowered);
11226  return;
11227  }
11228  case ISD::FSHL:
11229  case ISD::FSHR:
11230  // Don't handle funnel shifts here.
11231  return;
11232  case ISD::BITCAST:
11233  // Don't handle bitcast here.
11234  return;
11235  case ISD::FP_EXTEND:
11236  SDValue Lowered = LowerFP_EXTEND(SDValue(N, 0), DAG);
11237  if (Lowered)
11238  Results.push_back(Lowered);
11239  return;
11240  }
11241 }
11242 
11243 //===----------------------------------------------------------------------===//
11244 // Other Lowering Code
11245 //===----------------------------------------------------------------------===//
11246 
11248  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
11250  return Builder.CreateCall(Func, {});
11251 }
11252 
11253 // The mappings for emitLeading/TrailingFence is taken from
11254 // http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
11256  Instruction *Inst,
11257  AtomicOrdering Ord) const {
11259  return callIntrinsic(Builder, Intrinsic::ppc_sync);
11260  if (isReleaseOrStronger(Ord))
11261  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11262  return nullptr;
11263 }
11264 
11266  Instruction *Inst,
11267  AtomicOrdering Ord) const {
11268  if (Inst->hasAtomicLoad() && isAcquireOrStronger(Ord)) {
11269  // See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html and
11270  // http://www.rdrop.com/users/paulmck/scalability/paper/N2745r.2011.03.04a.html
11271  // and http://www.cl.cam.ac.uk/~pes20/cppppc/ for justification.
11272  if (isa<LoadInst>(Inst) && Subtarget.isPPC64())
11273  return Builder.CreateCall(
11275  Builder.GetInsertBlock()->getParent()->getParent(),
11276  Intrinsic::ppc_cfence, {Inst->getType()}),
11277  {Inst});
11278  // FIXME: Can use isync for rmw operation.
11279  return callIntrinsic(Builder, Intrinsic::ppc_lwsync);
11280  }
11281  return nullptr;
11282 }
11283 
11286  unsigned AtomicSize,
11287  unsigned BinOpcode,
11288  unsigned CmpOpcode,
11289  unsigned CmpPred) const {
11290  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11291  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11292 
11293  auto LoadMnemonic = PPC::LDARX;
11294  auto StoreMnemonic = PPC::STDCX;
11295  switch (AtomicSize) {
11296  default:
11297  llvm_unreachable("Unexpected size of atomic entity");
11298  case 1:
11299  LoadMnemonic = PPC::LBARX;
11300  StoreMnemonic = PPC::STBCX;
11301  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11302  break;
11303  case 2:
11304  LoadMnemonic = PPC::LHARX;
11305  StoreMnemonic = PPC::STHCX;
11306  assert(Subtarget.hasPartwordAtomics() && "Call this only with size >=4");
11307  break;
11308  case 4:
11309  LoadMnemonic = PPC::LWARX;
11310  StoreMnemonic = PPC::STWCX;
11311  break;
11312  case 8:
11313  LoadMnemonic = PPC::LDARX;
11314  StoreMnemonic = PPC::STDCX;
11315  break;
11316  }
11317 
11318  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11319  MachineFunction *F = BB->getParent();
11320  MachineFunction::iterator It = ++BB->getIterator();
11321 
11322  Register dest = MI.getOperand(0).getReg();
11323  Register ptrA = MI.getOperand(1).getReg();
11324  Register ptrB = MI.getOperand(2).getReg();
11325  Register incr = MI.getOperand(3).getReg();
11326  DebugLoc dl = MI.getDebugLoc();
11327 
11328  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11329  MachineBasicBlock *loop2MBB =
11330  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11331  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11332  F->insert(It, loopMBB);
11333  if (CmpOpcode)
11334  F->insert(It, loop2MBB);
11335  F->insert(It, exitMBB);
11336  exitMBB->splice(exitMBB->begin(), BB,
11337  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11339 
11340  MachineRegisterInfo &RegInfo = F->getRegInfo();
11341  Register TmpReg = (!BinOpcode) ? incr :
11342  RegInfo.createVirtualRegister( AtomicSize == 8 ? &PPC::G8RCRegClass
11343  : &PPC::GPRCRegClass);
11344 
11345  // thisMBB:
11346  // ...
11347  // fallthrough --> loopMBB
11348  BB->addSuccessor(loopMBB);
11349 
11350  // loopMBB:
11351  // l[wd]arx dest, ptr
11352  // add r0, dest, incr
11353  // st[wd]cx. r0, ptr
11354  // bne- loopMBB
11355  // fallthrough --> exitMBB
11356 
11357  // For max/min...
11358  // loopMBB:
11359  // l[wd]arx dest, ptr
11360  // cmpl?[wd] incr, dest
11361  // bgt exitMBB
11362  // loop2MBB:
11363  // st[wd]cx. dest, ptr
11364  // bne- loopMBB
11365  // fallthrough --> exitMBB
11366 
11367  BB = loopMBB;
11368  BuildMI(BB, dl, TII->get(LoadMnemonic), dest)
11369  .addReg(ptrA).addReg(ptrB);
11370  if (BinOpcode)
11371  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg).addReg(incr).addReg(dest);
11372  if (CmpOpcode) {
11373  // Signed comparisons of byte or halfword values must be sign-extended.
11374  if (CmpOpcode == PPC::CMPW && AtomicSize < 4) {
11375  Register ExtReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11376  BuildMI(BB, dl, TII->get(AtomicSize == 1 ? PPC::EXTSB : PPC::EXTSH),
11377  ExtReg).addReg(dest);
11378  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11379  .addReg(incr).addReg(ExtReg);
11380  } else
11381  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11382  .addReg(incr).addReg(dest);
11383 
11384  BuildMI(BB, dl, TII->get(PPC::BCC))
11385  .addImm(CmpPred).addReg(PPC::CR0).addMBB(exitMBB);
11386  BB->addSuccessor(loop2MBB);
11387  BB->addSuccessor(exitMBB);
11388  BB = loop2MBB;
11389  }
11390  BuildMI(BB, dl, TII->get(StoreMnemonic))
11391  .addReg(TmpReg).addReg(ptrA).addReg(ptrB);
11392  BuildMI(BB, dl, TII->get(PPC::BCC))
11393  .addImm(PPC::PRED_NE).addReg(PPC::CR0).addMBB(loopMBB);
11394  BB->addSuccessor(loopMBB);
11395  BB->addSuccessor(exitMBB);
11396 
11397  // exitMBB:
11398  // ...
11399  BB = exitMBB;
11400  return BB;
11401 }
11402 
11404  switch(MI.getOpcode()) {
11405  default:
11406  return false;
11407  case PPC::COPY:
11408  return TII->isSignExtended(MI);
11409  case PPC::LHA:
11410  case PPC::LHA8:
11411  case PPC::LHAU:
11412  case PPC::LHAU8:
11413  case PPC::LHAUX:
11414  case PPC::LHAUX8:
11415  case PPC::LHAX:
11416  case PPC::LHAX8:
11417  case PPC::LWA:
11418  case PPC::LWAUX:
11419  case PPC::LWAX:
11420  case PPC::LWAX_32:
11421  case PPC::LWA_32:
11422  case PPC::PLHA:
11423  case PPC::PLHA8:
11424  case PPC::PLHA8pc:
11425  case PPC::PLHApc:
11426  case PPC::PLWA:
11427  case PPC::PLWA8:
11428  case PPC::PLWA8pc:
11429  case PPC::PLWApc:
11430  case PPC::EXTSB:
11431  case PPC::EXTSB8:
11432  case PPC::EXTSB8_32_64:
11433  case PPC::EXTSB8_rec:
11434  case PPC::EXTSB_rec:
11435  case PPC::EXTSH:
11436  case PPC::EXTSH8:
11437  case PPC::EXTSH8_32_64:
11438  case PPC::EXTSH8_rec:
11439  case PPC::EXTSH_rec:
11440  case PPC::EXTSW:
11441  case PPC::EXTSWSLI:
11442  case PPC::EXTSWSLI_32_64:
11443  case PPC::EXTSWSLI_32_64_rec:
11444  case PPC::EXTSWSLI_rec:
11445  case PPC::EXTSW_32:
11446  case PPC::EXTSW_32_64:
11447  case PPC::EXTSW_32_64_rec:
11448  case PPC::EXTSW_rec:
11449  case PPC::SRAW:
11450  case PPC::SRAWI:
11451  case PPC::SRAWI_rec:
11452  case PPC::SRAW_rec:
11453  return true;
11454  }
11455  return false;
11456 }
11457 
11460  bool is8bit, // operation
11461  unsigned BinOpcode, unsigned CmpOpcode, unsigned CmpPred) const {
11462  // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
11463  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
11464 
11465  // If this is a signed comparison and the value being compared is not known
11466  // to be sign extended, sign extend it here.
11467  DebugLoc dl = MI.getDebugLoc();
11468  MachineFunction *F = BB->getParent();
11469  MachineRegisterInfo &RegInfo = F->getRegInfo();
11470  Register incr = MI.getOperand(3).getReg();
11471  bool IsSignExtended = Register::isVirtualRegister(incr) &&
11472  isSignExtended(*RegInfo.getVRegDef(incr), TII);
11473 
11474  if (CmpOpcode == PPC::CMPW && !IsSignExtended) {
11475  Register ValueReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
11476  BuildMI(*BB, MI, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueReg)
11477  .addReg(MI.getOperand(3).getReg());
11478  MI.getOperand(3).setReg(ValueReg);
11479  }
11480  // If we support part-word atomic mnemonics, just use them
11481  if (Subtarget.hasPartwordAtomics())
11482  return EmitAtomicBinary(MI, BB, is8bit ? 1 : 2, BinOpcode, CmpOpcode,
11483  CmpPred);
11484 
11485  // In 64 bit mode we have to use 64 bits for addresses, even though the
11486  // lwarx/stwcx are 32 bits. With the 32-bit atomics we can use address
11487  // registers without caring whether they're 32 or 64, but here we're
11488  // doing actual arithmetic on the addresses.
11489  bool is64bit = Subtarget.isPPC64();
11490  bool isLittleEndian = Subtarget.isLittleEndian();
11491  unsigned ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
11492 
11493  const BasicBlock *LLVM_BB = BB->getBasicBlock();
11494  MachineFunction::iterator It = ++BB->getIterator();
11495 
11496  Register dest = MI.getOperand(0).getReg();
11497  Register ptrA = MI.getOperand(1).getReg();
11498  Register ptrB = MI.getOperand(2).getReg();
11499 
11500  MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
11501  MachineBasicBlock *loop2MBB =
11502  CmpOpcode ? F->CreateMachineBasicBlock(LLVM_BB) : nullptr;
11503  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
11504  F->insert(It, loopMBB);
11505  if (CmpOpcode)
11506  F->insert(It, loop2MBB);
11507  F->insert(It, exitMBB);
11508  exitMBB->splice(exitMBB->begin(), BB,
11509  std::next(MachineBasicBlock::iterator(MI)), BB->end());
11511 
11512  const TargetRegisterClass *RC =
11513  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11514  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11515 
11516  Register PtrReg = RegInfo.createVirtualRegister(RC);
11517  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
11518  Register ShiftReg =
11519  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
11520  Register Incr2Reg = RegInfo.createVirtualRegister(GPRC);
11521  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
11522  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
11523  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
11524  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
11525  Register Tmp3Reg = RegInfo.createVirtualRegister(GPRC);
11526  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
11527  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
11528  Register SrwDestReg = RegInfo.createVirtualRegister(GPRC);
11529  Register Ptr1Reg;
11530  Register TmpReg =
11531  (!BinOpcode) ? Incr2Reg : RegInfo.createVirtualRegister(GPRC);
11532 
11533  // thisMBB:
11534  // ...
11535  // fallthrough --> loopMBB
11536  BB->addSuccessor(loopMBB);
11537 
11538  // The 4-byte load must be aligned, while a char or short may be
11539  // anywhere in the word. Hence all this nasty bookkeeping code.
11540  // add ptr1, ptrA, ptrB [copy if ptrA==0]
11541  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
11542  // xori shift, shift1, 24 [16]
11543  // rlwinm ptr, ptr1, 0, 0, 29
11544  // slw incr2, incr, shift
11545  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
11546  // slw mask, mask2, shift
11547  // loopMBB:
11548  // lwarx tmpDest, ptr
11549  // add tmp, tmpDest, incr2
11550  // andc tmp2, tmpDest, mask
11551  // and tmp3, tmp, mask
11552  // or tmp4, tmp3, tmp2
11553  // stwcx. tmp4, ptr
11554  // bne- loopMBB
11555  // fallthrough --> exitMBB
11556  // srw SrwDest, tmpDest, shift
11557  // rlwinm SrwDest, SrwDest, 0, 24 [16], 31
11558  if (ptrA != ZeroReg) {
11559  Ptr1Reg = RegInfo.createVirtualRegister(RC);
11560  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
11561  .addReg(ptrA)
11562  .addReg(ptrB);
11563  } else {
11564  Ptr1Reg = ptrB;
11565  }
11566  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
11567  // mode.
11568  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
11569  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
11570  .addImm(3)
11571  .addImm(27)
11572  .addImm(is8bit ? 28 : 27);
11573  if (!isLittleEndian)
11574  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
11575  .addReg(Shift1Reg)
11576  .addImm(is8bit ? 24 : 16);
11577  if (is64bit)
11578  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
11579  .addReg(Ptr1Reg)
11580  .addImm(0)
11581  .addImm(61);
11582  else
11583  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
11584  .addReg(Ptr1Reg)
11585  .addImm(0)
11586  .addImm(0)
11587  .addImm(29);
11588  BuildMI(BB, dl, TII->get(PPC::SLW), Incr2Reg).addReg(incr).addReg(ShiftReg);
11589  if (is8bit)
11590  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
11591  else {
11592  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
11593  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
11594  .addReg(Mask3Reg)
11595  .addImm(65535);
11596  }
11597  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
11598  .addReg(Mask2Reg)
11599  .addReg(ShiftReg);
11600 
11601  BB = loopMBB;
11602  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
11603  .addReg(ZeroReg)
11604  .addReg(PtrReg);
11605  if (BinOpcode)
11606  BuildMI(BB, dl, TII->get(BinOpcode), TmpReg)
11607  .addReg(Incr2Reg)
11608  .addReg(TmpDestReg);
11609  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
11610  .addReg(TmpDestReg)
11611  .addReg(MaskReg);
11612  BuildMI(BB, dl, TII->get(PPC::AND), Tmp3Reg).addReg(TmpReg).addReg(MaskReg);
11613  if (CmpOpcode) {
11614  // For unsigned comparisons, we can directly compare the shifted values.
11615  // For signed comparisons we shift and sign extend.
11616  Register SReg = RegInfo.createVirtualRegister(GPRC);
11617  BuildMI(BB, dl, TII->get(PPC::AND), SReg)
11618  .addReg(TmpDestReg)
11619  .addReg(MaskReg);
11620  unsigned ValueReg = SReg;
11621  unsigned CmpReg = Incr2Reg;
11622  if (CmpOpcode == PPC::CMPW) {
11623  ValueReg = RegInfo.createVirtualRegister(GPRC);
11624  BuildMI(BB, dl, TII->get(PPC::SRW), ValueReg)
11625  .addReg(SReg)
11626  .addReg(ShiftReg);
11627  Register ValueSReg = RegInfo.createVirtualRegister(GPRC);
11628  BuildMI(BB, dl, TII->get(is8bit ? PPC::EXTSB : PPC::EXTSH), ValueSReg)
11629  .addReg(ValueReg);
11630  ValueReg = ValueSReg;
11631  CmpReg = incr;
11632  }
11633  BuildMI(BB, dl, TII->get(CmpOpcode), PPC::CR0)
11634  .addReg(CmpReg)
11635  .addReg(ValueReg);
11636  BuildMI(BB, dl, TII->get(PPC::BCC))
11637  .addImm(CmpPred)
11638  .addReg(PPC::CR0)
11639  .addMBB(exitMBB);
11640  BB->addSuccessor(loop2MBB);
11641  BB->addSuccessor(exitMBB);
11642  BB = loop2MBB;
11643  }
11644  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg).addReg(Tmp3Reg).addReg(Tmp2Reg);
11645  BuildMI(BB, dl, TII->get(PPC::STWCX))
11646  .addReg(Tmp4Reg)
11647  .addReg(ZeroReg)
11648  .addReg(PtrReg);
11649  BuildMI(BB, dl, TII->get(PPC::BCC))
11651  .addReg(PPC::CR0)
11652  .addMBB(loopMBB);
11653  BB->addSuccessor(loopMBB);
11654  BB->addSuccessor(exitMBB);
11655 
11656  // exitMBB:
11657  // ...
11658  BB = exitMBB;
11659  // Since the shift amount is not a constant, we need to clear
11660  // the upper bits with a separate RLWINM.
11661  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::RLWINM), dest)
11662  .addReg(SrwDestReg)
11663  .addImm(0)
11664  .addImm(is8bit ? 24 : 16)
11665  .addImm(31);
11666  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), SrwDestReg)
11667  .addReg(TmpDestReg)
11668  .addReg(ShiftReg);
11669  return BB;
11670 }
11671 
11674  MachineBasicBlock *MBB) const {
11675  DebugLoc DL = MI.getDebugLoc();
11676  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11677  const PPCRegisterInfo *TRI = Subtarget.getRegisterInfo();
11678 
11679  MachineFunction *MF = MBB->getParent();
11681 
11682  const BasicBlock *BB = MBB->getBasicBlock();
11684 
11685  Register DstReg = MI.getOperand(0).getReg();
11686  const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
11687  assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
11688  Register mainDstReg = MRI.createVirtualRegister(RC);
11689  Register restoreDstReg = MRI.createVirtualRegister(RC);
11690 
11691  MVT PVT = getPointerTy(MF->getDataLayout());
11692  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11693  "Invalid Pointer Size!");
11694  // For v = setjmp(buf), we generate
11695  //
11696  // thisMBB:
11697  // SjLjSetup mainMBB
11698  // bl mainMBB
11699  // v_restore = 1
11700  // b sinkMBB
11701  //
11702  // mainMBB:
11703  // buf[LabelOffset] = LR
11704  // v_main = 0
11705  //
11706  // sinkMBB:
11707  // v = phi(main, restore)
11708  //
11709 
11710  MachineBasicBlock *thisMBB = MBB;
11713  MF->insert(I, mainMBB);
11714  MF->insert(I, sinkMBB);
11715 
11716  MachineInstrBuilder MIB;
11717 
11718  // Transfer the remainder of BB and its successor edges to sinkMBB.
11719  sinkMBB->splice(sinkMBB->begin(), MBB,
11720  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
11722 
11723  // Note that the structure of the jmp_buf used here is not compatible
11724  // with that used by libc, and is not designed to be. Specifically, it
11725  // stores only those 'reserved' registers that LLVM does not otherwise
11726  // understand how to spill. Also, by convention, by the time this
11727  // intrinsic is called, Clang has already stored the frame address in the
11728  // first slot of the buffer and stack address in the third. Following the
11729  // X86 target code, we'll store the jump address in the second slot. We also
11730  // need to save the TOC pointer (R2) to handle jumps between shared
11731  // libraries, and that will be stored in the fourth slot. The thread
11732  // identifier (R13) is not affected.
11733 
11734  // thisMBB:
11735  const int64_t LabelOffset = 1 * PVT.getStoreSize();
11736  const int64_t TOCOffset = 3 * PVT.getStoreSize();
11737  const int64_t BPOffset = 4 * PVT.getStoreSize();
11738 
11739  // Prepare IP either in reg.
11740  const TargetRegisterClass *PtrRC = getRegClassFor(PVT);
11741  Register LabelReg = MRI.createVirtualRegister(PtrRC);
11742  Register BufReg = MI.getOperand(1).getReg();
11743 
11744  if (Subtarget.is64BitELFABI()) {
11746  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::STD))
11747  .addReg(PPC::X2)
11748  .addImm(TOCOffset)
11749  .addReg(BufReg)
11750  .cloneMemRefs(MI);
11751  }
11752 
11753  // Naked functions never have a base pointer, and so we use r1. For all
11754  // other functions, this decision must be delayed until during PEI.
11755  unsigned BaseReg;
11756  if (MF->getFunction().hasFnAttribute(Attribute::Naked))
11757  BaseReg = Subtarget.isPPC64() ? PPC::X1 : PPC::R1;
11758  else
11759  BaseReg = Subtarget.isPPC64() ? PPC::BP8 : PPC::BP;
11760 
11761  MIB = BuildMI(*thisMBB, MI, DL,
11762  TII->get(Subtarget.isPPC64() ? PPC::STD : PPC::STW))
11763  .addReg(BaseReg)
11764  .addImm(BPOffset)
11765  .addReg(BufReg)
11766  .cloneMemRefs(MI);
11767 
11768  // Setup
11769  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::BCLalways)).addMBB(mainMBB);
11770  MIB.addRegMask(TRI->getNoPreservedMask());
11771 
11772  BuildMI(*thisMBB, MI, DL, TII->get(PPC::LI), restoreDstReg).addImm(1);
11773 
11774  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::EH_SjLj_Setup))
11775  .addMBB(mainMBB);
11776  MIB = BuildMI(*thisMBB, MI, DL, TII->get(PPC::B)).addMBB(sinkMBB);
11777 
11778  thisMBB->addSuccessor(mainMBB, BranchProbability::getZero());
11779  thisMBB->addSuccessor(sinkMBB, BranchProbability::getOne());
11780 
11781  // mainMBB:
11782  // mainDstReg = 0
11783  MIB =
11784  BuildMI(mainMBB, DL,
11785  TII->get(Subtarget.isPPC64() ? PPC::MFLR8 : PPC::MFLR), LabelReg);
11786 
11787  // Store IP
11788  if (Subtarget.isPPC64()) {
11789  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STD))
11790  .addReg(LabelReg)
11791  .addImm(LabelOffset)
11792  .addReg(BufReg);
11793  } else {
11794  MIB = BuildMI(mainMBB, DL, TII->get(PPC::STW))
11795  .addReg(LabelReg)
11796  .addImm(LabelOffset)
11797  .addReg(BufReg);
11798  }
11799  MIB.cloneMemRefs(MI);
11800 
11801  BuildMI(mainMBB, DL, TII->get(PPC::LI), mainDstReg).addImm(0);
11802  mainMBB->addSuccessor(sinkMBB);
11803 
11804  // sinkMBB:
11805  BuildMI(*sinkMBB, sinkMBB->begin(), DL,
11806  TII->get(PPC::PHI), DstReg)
11807  .addReg(mainDstReg).addMBB(mainMBB)
11808  .addReg(restoreDstReg).addMBB(thisMBB);
11809 
11810  MI.eraseFromParent();
11811  return sinkMBB;
11812 }
11813 
11816  MachineBasicBlock *MBB) const {
11817  DebugLoc DL = MI.getDebugLoc();
11818  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11819 
11820  MachineFunction *MF = MBB->getParent();
11822 
11823  MVT PVT = getPointerTy(MF->getDataLayout());
11824  assert((PVT == MVT::i64 || PVT == MVT::i32) &&
11825  "Invalid Pointer Size!");
11826 
11827  const TargetRegisterClass *RC =
11828  (PVT == MVT::i64) ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
11829  Register Tmp = MRI.createVirtualRegister(RC);
11830  // Since FP is only updated here but NOT referenced, it's treated as GPR.
11831  unsigned FP = (PVT == MVT::i64) ? PPC::X31 : PPC::R31;
11832  unsigned SP = (PVT == MVT::i64) ? PPC::X1 : PPC::R1;
11833  unsigned BP =
11834  (PVT == MVT::i64)
11835  ? PPC::X30
11836  : (Subtarget.isSVR4ABI() && isPositionIndependent() ? PPC::R29
11837  : PPC::R30);
11838 
11839  MachineInstrBuilder MIB;
11840 
11841  const int64_t LabelOffset = 1 * PVT.getStoreSize();
11842  const int64_t SPOffset = 2 * PVT.getStoreSize();
11843  const int64_t TOCOffset = 3 * PVT.getStoreSize();
11844  const int64_t BPOffset = 4 * PVT.getStoreSize();
11845 
11846  Register BufReg = MI.getOperand(0).getReg();
11847 
11848  // Reload FP (the jumped-to function may not have had a
11849  // frame pointer, and if so, then its r31 will be restored
11850  // as necessary).
11851  if (PVT == MVT::i64) {
11852  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), FP)
11853  .addImm(0)
11854  .addReg(BufReg);
11855  } else {
11856  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), FP)
11857  .addImm(0)
11858  .addReg(BufReg);
11859  }
11860  MIB.cloneMemRefs(MI);
11861 
11862  // Reload IP
11863  if (PVT == MVT::i64) {
11864  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), Tmp)
11865  .addImm(LabelOffset)
11866  .addReg(BufReg);
11867  } else {
11868  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), Tmp)
11869  .addImm(LabelOffset)
11870  .addReg(BufReg);
11871  }
11872  MIB.cloneMemRefs(MI);
11873 
11874  // Reload SP
11875  if (PVT == MVT::i64) {
11876  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), SP)
11877  .addImm(SPOffset)
11878  .addReg(BufReg);
11879  } else {
11880  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), SP)
11881  .addImm(SPOffset)
11882  .addReg(BufReg);
11883  }
11884  MIB.cloneMemRefs(MI);
11885 
11886  // Reload BP
11887  if (PVT == MVT::i64) {
11888  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), BP)
11889  .addImm(BPOffset)
11890  .addReg(BufReg);
11891  } else {
11892  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LWZ), BP)
11893  .addImm(BPOffset)
11894  .addReg(BufReg);
11895  }
11896  MIB.cloneMemRefs(MI);
11897 
11898  // Reload TOC
11899  if (PVT == MVT::i64 && Subtarget.isSVR4ABI()) {
11901  MIB = BuildMI(*MBB, MI, DL, TII->get(PPC::LD), PPC::X2)
11902  .addImm(TOCOffset)
11903  .addReg(BufReg)
11904  .cloneMemRefs(MI);
11905  }
11906 
11907  // Jump
11908  BuildMI(*MBB, MI, DL,
11909  TII->get(PVT == MVT::i64 ? PPC::MTCTR8 : PPC::MTCTR)).addReg(Tmp);
11910  BuildMI(*MBB, MI, DL, TII->get(PVT == MVT::i64 ? PPC::BCTR8 : PPC::BCTR));
11911 
11912  MI.eraseFromParent();
11913  return MBB;
11914 }
11915 
11917  // If the function specifically requests inline stack probes, emit them.
11918  if (MF.getFunction().hasFnAttribute("probe-stack"))
11919  return MF.getFunction().getFnAttribute("probe-stack").getValueAsString() ==
11920  "inline-asm";
11921  return false;
11922 }
11923 
11925  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
11926  unsigned StackAlign = TFI->getStackAlignment();
11928  "Unexpected stack alignment");
11929  // The default stack probe size is 4096 if the function has no
11930  // stack-probe-size attribute.
11931  unsigned StackProbeSize = 4096;
11932  const Function &Fn = MF.getFunction();
11933  if (Fn.hasFnAttribute("stack-probe-size"))
11934  Fn.getFnAttribute("stack-probe-size")
11935  .getValueAsString()
11936  .getAsInteger(0, StackProbeSize);
11937  // Round down to the stack alignment.
11938  StackProbeSize &= ~(StackAlign - 1);
11939  return StackProbeSize ? StackProbeSize : StackAlign;
11940 }
11941 
11942 // Lower dynamic stack allocation with probing. `emitProbedAlloca` is splitted
11943 // into three phases. In the first phase, it uses pseudo instruction
11944 // PREPARE_PROBED_ALLOCA to get the future result of actual FramePointer and
11945 // FinalStackPtr. In the second phase, it generates a loop for probing blocks.
11946 // At last, it uses pseudo instruction DYNAREAOFFSET to get the future result of
11947 // MaxCallFrameSize so that it can calculate correct data area pointer.
11950  MachineBasicBlock *MBB) const {
11951  const bool isPPC64 = Subtarget.isPPC64();
11952  MachineFunction *MF = MBB->getParent();
11953  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
11954  DebugLoc DL = MI.getDebugLoc();
11955  const unsigned ProbeSize = getStackProbeSize(*MF);
11956  const BasicBlock *ProbedBB = MBB->getBasicBlock();
11958  // The CFG of probing stack looks as
11959  // +-----+
11960  // | MBB |
11961  // +--+--+
11962  // |
11963  // +----v----+
11964  // +--->+ TestMBB +---+
11965  // | +----+----+ |
11966  // | | |
11967  // | +-----v----+ |
11968  // +---+ BlockMBB | |
11969  // +----------+ |
11970  // |
11971  // +---------+ |
11972  // | TailMBB +<--+
11973  // +---------+
11974  // In MBB, calculate previous frame pointer and final stack pointer.
11975  // In TestMBB, test if sp is equal to final stack pointer, if so, jump to
11976  // TailMBB. In BlockMBB, update the sp atomically and jump back to TestMBB.
11977  // TailMBB is spliced via \p MI.
11978  MachineBasicBlock *TestMBB = MF->CreateMachineBasicBlock(ProbedBB);
11979  MachineBasicBlock *TailMBB = MF->CreateMachineBasicBlock(ProbedBB);
11980  MachineBasicBlock *BlockMBB = MF->CreateMachineBasicBlock(ProbedBB);
11981 
11982  MachineFunction::iterator MBBIter = ++MBB->getIterator();
11983  MF->insert(MBBIter, TestMBB);
11984  MF->insert(MBBIter, BlockMBB);
11985  MF->insert(MBBIter, TailMBB);
11986 
11987  const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
11988  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
11989 
11990  Register DstReg = MI.getOperand(0).getReg();
11991  Register NegSizeReg = MI.getOperand(1).getReg();
11992  Register SPReg = isPPC64 ? PPC::X1 : PPC::R1;
11993  Register FinalStackPtr = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11994  Register FramePointer = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11995  Register ActualNegSizeReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
11996 
11997  // Since value of NegSizeReg might be realigned in prologepilog, insert a
11998  // PREPARE_PROBED_ALLOCA pseudo instruction to get actual FramePointer and
11999  // NegSize.
12000  unsigned ProbeOpc;
12001  if (!MRI.hasOneNonDBGUse(NegSizeReg))
12002  ProbeOpc =
12003  isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_64 : PPC::PREPARE_PROBED_ALLOCA_32;
12004  else
12005  // By introducing PREPARE_PROBED_ALLOCA_NEGSIZE_OPT, ActualNegSizeReg
12006  // and NegSizeReg will be allocated in the same phyreg to avoid
12007  // redundant copy when NegSizeReg has only one use which is current MI and
12008  // will be replaced by PREPARE_PROBED_ALLOCA then.
12009  ProbeOpc = isPPC64 ? PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_64
12010  : PPC::PREPARE_PROBED_ALLOCA_NEGSIZE_SAME_REG_32;
12011  BuildMI(*MBB, {MI}, DL, TII->get(ProbeOpc), FramePointer)
12012  .addDef(ActualNegSizeReg)
12013  .addReg(NegSizeReg)
12014  .add(MI.getOperand(2))
12015  .add(MI.getOperand(3));
12016 
12017  // Calculate final stack pointer, which equals to SP + ActualNegSize.
12018  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
12019  FinalStackPtr)
12020  .addReg(SPReg)
12021  .addReg(ActualNegSizeReg);
12022 
12023  // Materialize a scratch register for update.
12024  int64_t NegProbeSize = -(int64_t)ProbeSize;
12025  assert(isInt<32>(NegProbeSize) && "Unhandled probe size!");
12026  Register ScratchReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12027  if (!isInt<16>(NegProbeSize)) {
12028  Register TempReg = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12029  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg)
12030  .addImm(NegProbeSize >> 16);
12031  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::ORI8 : PPC::ORI),
12032  ScratchReg)
12033  .addReg(TempReg)
12034  .addImm(NegProbeSize & 0xFFFF);
12035  } else
12036  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::LI8 : PPC::LI), ScratchReg)
12037  .addImm(NegProbeSize);
12038 
12039  {
12040  // Probing leading residual part.
12041  Register Div = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12042  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::DIVD : PPC::DIVW), Div)
12043  .addReg(ActualNegSizeReg)
12044  .addReg(ScratchReg);
12045  Register Mul = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12046  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::MULLD : PPC::MULLW), Mul)
12047  .addReg(Div)
12048  .addReg(ScratchReg);
12049  Register NegMod = MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12050  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), NegMod)
12051  .addReg(Mul)
12052  .addReg(ActualNegSizeReg);
12053  BuildMI(*MBB, {MI}, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12054  .addReg(FramePointer)
12055  .addReg(SPReg)
12056  .addReg(NegMod);
12057  }
12058 
12059  {
12060  // Remaining part should be multiple of ProbeSize.
12061  Register CmpResult = MRI.createVirtualRegister(&PPC::CRRCRegClass);
12062  BuildMI(TestMBB, DL, TII->get(isPPC64 ? PPC::CMPD : PPC::CMPW), CmpResult)
12063  .addReg(SPReg)
12064  .addReg(FinalStackPtr);
12065  BuildMI(TestMBB, DL, TII->get(PPC::BCC))
12067  .addReg(CmpResult)
12068  .addMBB(TailMBB);
12069  TestMBB->addSuccessor(BlockMBB);
12070  TestMBB->addSuccessor(TailMBB);
12071  }
12072 
12073  {
12074  // Touch the block.
12075  // |P...|P...|P...
12076  BuildMI(BlockMBB, DL, TII->get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg)
12077  .addReg(FramePointer)
12078  .addReg(SPReg)
12079  .addReg(ScratchReg);
12080  BuildMI(BlockMBB, DL, TII->get(PPC::B)).addMBB(TestMBB);
12081  BlockMBB->addSuccessor(TestMBB);
12082  }
12083 
12084  // Calculation of MaxCallFrameSize is deferred to prologepilog, use
12085  // DYNAREAOFFSET pseudo instruction to get the future result.
12086  Register MaxCallFrameSizeReg =
12087  MRI.createVirtualRegister(isPPC64 ? G8RC : GPRC);
12088  BuildMI(TailMBB, DL,
12089  TII->get(isPPC64 ? PPC::DYNAREAOFFSET8 : PPC::DYNAREAOFFSET),
12090  MaxCallFrameSizeReg)
12091  .add(MI.getOperand(2))
12092  .add(MI.getOperand(3));
12093  BuildMI(TailMBB, DL, TII->get(isPPC64 ? PPC::ADD8 : PPC::ADD4), DstReg)
12094  .addReg(SPReg)
12095  .addReg(MaxCallFrameSizeReg);
12096 
12097  // Splice instructions after MI to TailMBB.
12098  TailMBB->splice(TailMBB->end(), MBB,
12099  std::next(MachineBasicBlock::iterator(MI)), MBB->end());
12101  MBB->addSuccessor(TestMBB);
12102 
12103  // Delete the pseudo instruction.
12104  MI.eraseFromParent();
12105 
12106  ++NumDynamicAllocaProbed;
12107  return TailMBB;
12108 }
12109 
12112  MachineBasicBlock *BB) const {
12113  if (MI.getOpcode() == TargetOpcode::STACKMAP ||
12114  MI.getOpcode() == TargetOpcode::PATCHPOINT) {
12115  if (Subtarget.is64BitELFABI() &&
12116  MI.getOpcode() == TargetOpcode::PATCHPOINT &&
12117  !Subtarget.isUsingPCRelativeCalls()) {
12118  // Call lowering should have added an r2 operand to indicate a dependence
12119  // on the TOC base pointer value. It can't however, because there is no
12120  // way to mark the dependence as implicit there, and so the stackmap code
12121  // will confuse it with a regular operand. Instead, add the dependence
12122  // here.
12123  MI.addOperand(MachineOperand::CreateReg(PPC::X2, false, true));
12124  }
12125 
12126  return emitPatchPoint(MI, BB);
12127  }
12128 
12129  if (MI.getOpcode() == PPC::EH_SjLj_SetJmp32 ||
12130  MI.getOpcode() == PPC::EH_SjLj_SetJmp64) {
12131  return emitEHSjLjSetJmp(MI, BB);
12132  } else if (MI.getOpcode() == PPC::EH_SjLj_LongJmp32 ||
12133  MI.getOpcode() == PPC::EH_SjLj_LongJmp64) {
12134  return emitEHSjLjLongJmp(MI, BB);
12135  }
12136 
12137  const TargetInstrInfo *TII = Subtarget.getInstrInfo();
12138 
12139  // To "insert" these instructions we actually have to insert their
12140  // control-flow patterns.
12141  const BasicBlock *LLVM_BB = BB->getBasicBlock();
12142  MachineFunction::iterator It = ++BB->getIterator();
12143 
12144  MachineFunction *F = BB->getParent();
12145  MachineRegisterInfo &MRI = F->getRegInfo();
12146 
12147  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12148  MI.getOpcode() == PPC::SELECT_CC_I8 || MI.getOpcode() == PPC::SELECT_I4 ||
12149  MI.getOpcode() == PPC::SELECT_I8) {
12151  if (MI.getOpcode() == PPC::SELECT_CC_I4 ||
12152  MI.getOpcode() == PPC::SELECT_CC_I8)
12153  Cond.push_back(MI.getOperand(4));
12154  else
12156  Cond.push_back(MI.getOperand(1));
12157 
12158  DebugLoc dl = MI.getDebugLoc();
12159  TII->insertSelect(*BB, MI, dl, MI.getOperand(0).getReg(), Cond,
12160  MI.getOperand(2).getReg(), MI.getOperand(3).getReg());
12161  } else if (MI.getOpcode() == PPC::SELECT_CC_F4 ||
12162  MI.getOpcode() == PPC::SELECT_CC_F8 ||
12163  MI.getOpcode() == PPC::SELECT_CC_F16 ||
12164  MI.getOpcode() == PPC::SELECT_CC_VRRC ||
12165  MI.getOpcode() == PPC::SELECT_CC_VSFRC ||
12166  MI.getOpcode() == PPC::SELECT_CC_VSSRC ||
12167  MI.getOpcode() == PPC::SELECT_CC_VSRC ||
12168  MI.getOpcode() == PPC::SELECT_CC_SPE4 ||
12169  MI.getOpcode() == PPC::SELECT_CC_SPE ||
12170  MI.getOpcode() == PPC::SELECT_F4 ||
12171  MI.getOpcode() == PPC::SELECT_F8 ||
12172  MI.getOpcode() == PPC::SELECT_F16 ||
12173  MI.getOpcode() == PPC::SELECT_SPE ||
12174  MI.getOpcode() == PPC::SELECT_SPE4 ||
12175  MI.getOpcode() == PPC::SELECT_VRRC ||
12176  MI.getOpcode() == PPC::SELECT_VSFRC ||
12177  MI.getOpcode() == PPC::SELECT_VSSRC ||
12178  MI.getOpcode() == PPC::SELECT_VSRC) {
12179  // The incoming instruction knows the destination vreg to set, the
12180  // condition code register to branch on, the true/false values to
12181  // select between, and a branch opcode to use.
12182 
12183  // thisMBB:
12184  // ...
12185  // TrueVal = ...
12186  // cmpTY ccX, r1, r2
12187  // bCC copy1MBB
12188  // fallthrough --> copy0MBB
12189  MachineBasicBlock *thisMBB = BB;
12190  MachineBasicBlock *copy0MBB = F->CreateMachineBasicBlock(LLVM_BB);
12191  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12192  DebugLoc dl = MI.getDebugLoc();
12193  F->insert(It, copy0MBB);
12194  F->insert(It, sinkMBB);
12195 
12196  // Transfer the remainder of BB and its successor edges to sinkMBB.
12197  sinkMBB->splice(sinkMBB->begin(), BB,
12198  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12200 
12201  // Next, add the true and fallthrough blocks as its successors.
12202  BB->addSuccessor(copy0MBB);
12203  BB->addSuccessor(sinkMBB);
12204 
12205  if (MI.getOpcode() == PPC::SELECT_I4 || MI.getOpcode() == PPC::SELECT_I8 ||
12206  MI.getOpcode() == PPC::SELECT_F4 || MI.getOpcode() == PPC::SELECT_F8 ||
12207  MI.getOpcode() == PPC::SELECT_F16 ||
12208  MI.getOpcode() == PPC::SELECT_SPE4 ||
12209  MI.getOpcode() == PPC::SELECT_SPE ||
12210  MI.getOpcode() == PPC::SELECT_VRRC ||
12211  MI.getOpcode() == PPC::SELECT_VSFRC ||
12212  MI.getOpcode() == PPC::SELECT_VSSRC ||
12213  MI.getOpcode() == PPC::SELECT_VSRC) {
12214  BuildMI(BB, dl, TII->get(PPC::BC))
12215  .addReg(MI.getOperand(1).getReg())
12216  .addMBB(sinkMBB);
12217  } else {
12218  unsigned SelectPred = MI.getOperand(4).getImm();
12219  BuildMI(BB, dl, TII->get(PPC::BCC))
12220  .addImm(SelectPred)
12221  .addReg(MI.getOperand(1).getReg())
12222  .addMBB(sinkMBB);
12223  }
12224 
12225  // copy0MBB:
12226  // %FalseValue = ...
12227  // # fallthrough to sinkMBB
12228  BB = copy0MBB;
12229 
12230  // Update machine-CFG edges
12231  BB->addSuccessor(sinkMBB);
12232 
12233  // sinkMBB:
12234  // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
12235  // ...
12236  BB = sinkMBB;
12237  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::PHI), MI.getOperand(0).getReg())
12238  .addReg(MI.getOperand(3).getReg())
12239  .addMBB(copy0MBB)
12240  .addReg(MI.getOperand(2).getReg())
12241  .addMBB(thisMBB);
12242  } else if (MI.getOpcode() == PPC::ReadTB) {
12243  // To read the 64-bit time-base register on a 32-bit target, we read the
12244  // two halves. Should the counter have wrapped while it was being read, we
12245  // need to try again.
12246  // ...
12247  // readLoop:
12248  // mfspr Rx,TBU # load from TBU
12249  // mfspr Ry,TB # load from TB
12250  // mfspr Rz,TBU # load from TBU
12251  // cmpw crX,Rx,Rz # check if 'old'='new'
12252  // bne readLoop # branch if they're not equal
12253  // ...
12254 
12255  MachineBasicBlock *readMBB = F->CreateMachineBasicBlock(LLVM_BB);
12256  MachineBasicBlock *sinkMBB = F->CreateMachineBasicBlock(LLVM_BB);
12257  DebugLoc dl = MI.getDebugLoc();
12258  F->insert(It, readMBB);
12259  F->insert(It, sinkMBB);
12260 
12261  // Transfer the remainder of BB and its successor edges to sinkMBB.
12262  sinkMBB->splice(sinkMBB->begin(), BB,
12263  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12265 
12266  BB->addSuccessor(readMBB);
12267  BB = readMBB;
12268 
12269  MachineRegisterInfo &RegInfo = F->getRegInfo();
12270  Register ReadAgainReg = RegInfo.createVirtualRegister(&PPC::GPRCRegClass);
12271  Register LoReg = MI.getOperand(0).getReg();
12272  Register HiReg = MI.getOperand(1).getReg();
12273 
12274  BuildMI(BB, dl, TII->get(PPC::MFSPR), HiReg).addImm(269);
12275  BuildMI(BB, dl, TII->get(PPC::MFSPR), LoReg).addImm(268);
12276  BuildMI(BB, dl, TII->get(PPC::MFSPR), ReadAgainReg).addImm(269);
12277 
12278  Register CmpReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12279 
12280  BuildMI(BB, dl, TII->get(PPC::CMPW), CmpReg)
12281  .addReg(HiReg)
12282  .addReg(ReadAgainReg);
12283  BuildMI(BB, dl, TII->get(PPC::BCC))
12285  .addReg(CmpReg)
12286  .addMBB(readMBB);
12287 
12288  BB->addSuccessor(readMBB);
12289  BB->addSuccessor(sinkMBB);
12290  } else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I8)
12291  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::ADD4);
12292  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I16)
12293  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::ADD4);
12294  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I32)
12295  BB = EmitAtomicBinary(MI, BB, 4, PPC::ADD4);
12296  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_ADD_I64)
12297  BB = EmitAtomicBinary(MI, BB, 8, PPC::ADD8);
12298 
12299  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I8)
12301  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I16)
12303  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I32)
12304  BB = EmitAtomicBinary(MI, BB, 4, PPC::AND);
12305  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_AND_I64)
12306  BB = EmitAtomicBinary(MI, BB, 8, PPC::AND8);
12307 
12308  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I8)
12310  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I16)
12311  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::OR);
12312  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I32)
12313  BB = EmitAtomicBinary(MI, BB, 4, PPC::OR);
12314  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_OR_I64)
12315  BB = EmitAtomicBinary(MI, BB, 8, PPC::OR8);
12316 
12317  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I8)
12319  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I16)
12321  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I32)
12322  BB = EmitAtomicBinary(MI, BB, 4, PPC::XOR);
12323  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_XOR_I64)
12324  BB = EmitAtomicBinary(MI, BB, 8, PPC::XOR8);
12325 
12326  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I8)
12327  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::NAND);
12328  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I16)
12329  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::NAND);
12330  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I32)
12331  BB = EmitAtomicBinary(MI, BB, 4, PPC::NAND);
12332  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_NAND_I64)
12333  BB = EmitAtomicBinary(MI, BB, 8, PPC::NAND8);
12334 
12335  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I8)
12336  BB = EmitPartwordAtomicBinary(MI, BB, true, PPC::SUBF);
12337  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I16)
12338  BB = EmitPartwordAtomicBinary(MI, BB, false, PPC::SUBF);
12339  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I32)
12340  BB = EmitAtomicBinary(MI, BB, 4, PPC::SUBF);
12341  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_SUB_I64)
12342  BB = EmitAtomicBinary(MI, BB, 8, PPC::SUBF8);
12343 
12344  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I8)
12345  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_GE);
12346  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I16)
12347  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_GE);
12348  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I32)
12349  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_GE);
12350  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MIN_I64)
12351  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_GE);
12352 
12353  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I8)
12354  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPW, PPC::PRED_LE);
12355  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I16)
12356  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPW, PPC::PRED_LE);
12357  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I32)
12358  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPW, PPC::PRED_LE);
12359  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_MAX_I64)
12360  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPD, PPC::PRED_LE);
12361 
12362  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I8)
12363  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_GE);
12364  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I16)
12365  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_GE);
12366  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I32)
12367  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_GE);
12368  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMIN_I64)
12369  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_GE);
12370 
12371  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I8)
12372  BB = EmitPartwordAtomicBinary(MI, BB, true, 0, PPC::CMPLW, PPC::PRED_LE);
12373  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I16)
12374  BB = EmitPartwordAtomicBinary(MI, BB, false, 0, PPC::CMPLW, PPC::PRED_LE);
12375  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I32)
12376  BB = EmitAtomicBinary(MI, BB, 4, 0, PPC::CMPLW, PPC::PRED_LE);
12377  else if (MI.getOpcode() == PPC::ATOMIC_LOAD_UMAX_I64)
12378  BB = EmitAtomicBinary(MI, BB, 8, 0, PPC::CMPLD, PPC::PRED_LE);
12379 
12380  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I8)
12381  BB = EmitPartwordAtomicBinary(MI, BB, true, 0);
12382  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I16)
12383  BB = EmitPartwordAtomicBinary(MI, BB, false, 0);
12384  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I32)
12385  BB = EmitAtomicBinary(MI, BB, 4, 0);
12386  else if (MI.getOpcode() == PPC::ATOMIC_SWAP_I64)
12387  BB = EmitAtomicBinary(MI, BB, 8, 0);
12388  else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I32 ||
12389  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64 ||
12390  (Subtarget.hasPartwordAtomics() &&
12391  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8) ||
12392  (Subtarget.hasPartwordAtomics() &&
12393  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16)) {
12394  bool is64bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I64;
12395 
12396  auto LoadMnemonic = PPC::LDARX;
12397  auto StoreMnemonic = PPC::STDCX;
12398  switch (MI.getOpcode()) {
12399  default:
12400  llvm_unreachable("Compare and swap of unknown size");
12401  case PPC::ATOMIC_CMP_SWAP_I8:
12402  LoadMnemonic = PPC::LBARX;
12403  StoreMnemonic = PPC::STBCX;
12404  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12405  break;
12406  case PPC::ATOMIC_CMP_SWAP_I16:
12407  LoadMnemonic = PPC::LHARX;
12408  StoreMnemonic = PPC::STHCX;
12409  assert(Subtarget.hasPartwordAtomics() && "No support partword atomics.");
12410  break;
12411  case PPC::ATOMIC_CMP_SWAP_I32:
12412  LoadMnemonic = PPC::LWARX;
12413  StoreMnemonic = PPC::STWCX;
12414  break;
12415  case PPC::ATOMIC_CMP_SWAP_I64:
12416  LoadMnemonic = PPC::LDARX;
12417  StoreMnemonic = PPC::STDCX;
12418  break;
12419  }
12420  Register dest = MI.getOperand(0).getReg();
12421  Register ptrA = MI.getOperand(1).getReg();
12422  Register ptrB = MI.getOperand(2).getReg();
12423  Register oldval = MI.getOperand(3).getReg();
12424  Register newval = MI.getOperand(4).getReg();
12425  DebugLoc dl = MI.getDebugLoc();
12426 
12427  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12428  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12429  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12430  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12431  F->insert(It, loop1MBB);
12432  F->insert(It, loop2MBB);
12433  F->insert(It, midMBB);
12434  F->insert(It, exitMBB);
12435  exitMBB->splice(exitMBB->begin(), BB,
12436  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12438 
12439  // thisMBB:
12440  // ...
12441  // fallthrough --> loopMBB
12442  BB->addSuccessor(loop1MBB);
12443 
12444  // loop1MBB:
12445  // l[bhwd]arx dest, ptr
12446  // cmp[wd] dest, oldval
12447  // bne- midMBB
12448  // loop2MBB:
12449  // st[bhwd]cx. newval, ptr
12450  // bne- loopMBB
12451  // b exitBB
12452  // midMBB:
12453  // st[bhwd]cx. dest, ptr
12454  // exitBB:
12455  BB = loop1MBB;
12456  BuildMI(BB, dl, TII->get(LoadMnemonic), dest).addReg(ptrA).addReg(ptrB);
12457  BuildMI(BB, dl, TII->get(is64bit ? PPC::CMPD : PPC::CMPW), PPC::CR0)
12458  .addReg(oldval)
12459  .addReg(dest);
12460  BuildMI(BB, dl, TII->get(PPC::BCC))
12462  .addReg(PPC::CR0)
12463  .addMBB(midMBB);
12464  BB->addSuccessor(loop2MBB);
12465  BB->addSuccessor(midMBB);
12466 
12467  BB = loop2MBB;
12468  BuildMI(BB, dl, TII->get(StoreMnemonic))
12469  .addReg(newval)
12470  .addReg(ptrA)
12471  .addReg(ptrB);
12472  BuildMI(BB, dl, TII->get(PPC::BCC))
12474  .addReg(PPC::CR0)
12475  .addMBB(loop1MBB);
12476  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12477  BB->addSuccessor(loop1MBB);
12478  BB->addSuccessor(exitMBB);
12479 
12480  BB = midMBB;
12481  BuildMI(BB, dl, TII->get(StoreMnemonic))
12482  .addReg(dest)
12483  .addReg(ptrA)
12484  .addReg(ptrB);
12485  BB->addSuccessor(exitMBB);
12486 
12487  // exitMBB:
12488  // ...
12489  BB = exitMBB;
12490  } else if (MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8 ||
12491  MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I16) {
12492  // We must use 64-bit registers for addresses when targeting 64-bit,
12493  // since we're actually doing arithmetic on them. Other registers
12494  // can be 32-bit.
12495  bool is64bit = Subtarget.isPPC64();
12496  bool isLittleEndian = Subtarget.isLittleEndian();
12497  bool is8bit = MI.getOpcode() == PPC::ATOMIC_CMP_SWAP_I8;
12498 
12499  Register dest = MI.getOperand(0).getReg();
12500  Register ptrA = MI.getOperand(1).getReg();
12501  Register ptrB = MI.getOperand(2).getReg();
12502  Register oldval = MI.getOperand(3).getReg();
12503  Register newval = MI.getOperand(4).getReg();
12504  DebugLoc dl = MI.getDebugLoc();
12505 
12506  MachineBasicBlock *loop1MBB = F->CreateMachineBasicBlock(LLVM_BB);
12507  MachineBasicBlock *loop2MBB = F->CreateMachineBasicBlock(LLVM_BB);
12508  MachineBasicBlock *midMBB = F->CreateMachineBasicBlock(LLVM_BB);
12509  MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
12510  F->insert(It, loop1MBB);
12511  F->insert(It, loop2MBB);
12512  F->insert(It, midMBB);
12513  F->insert(It, exitMBB);
12514  exitMBB->splice(exitMBB->begin(), BB,
12515  std::next(MachineBasicBlock::iterator(MI)), BB->end());
12517 
12518  MachineRegisterInfo &RegInfo = F->getRegInfo();
12519  const TargetRegisterClass *RC =
12520  is64bit ? &PPC::G8RCRegClass : &PPC::GPRCRegClass;
12521  const TargetRegisterClass *GPRC = &PPC::GPRCRegClass;
12522 
12523  Register PtrReg = RegInfo.createVirtualRegister(RC);
12524  Register Shift1Reg = RegInfo.createVirtualRegister(GPRC);
12525  Register ShiftReg =
12526  isLittleEndian ? Shift1Reg : RegInfo.createVirtualRegister(GPRC);
12527  Register NewVal2Reg = RegInfo.createVirtualRegister(GPRC);
12528  Register NewVal3Reg = RegInfo.createVirtualRegister(GPRC);
12529  Register OldVal2Reg = RegInfo.createVirtualRegister(GPRC);
12530  Register OldVal3Reg = RegInfo.createVirtualRegister(GPRC);
12531  Register MaskReg = RegInfo.createVirtualRegister(GPRC);
12532  Register Mask2Reg = RegInfo.createVirtualRegister(GPRC);
12533  Register Mask3Reg = RegInfo.createVirtualRegister(GPRC);
12534  Register Tmp2Reg = RegInfo.createVirtualRegister(GPRC);
12535  Register Tmp4Reg = RegInfo.createVirtualRegister(GPRC);
12536  Register TmpDestReg = RegInfo.createVirtualRegister(GPRC);
12537  Register Ptr1Reg;
12538  Register TmpReg = RegInfo.createVirtualRegister(GPRC);
12539  Register ZeroReg = is64bit ? PPC::ZERO8 : PPC::ZERO;
12540  // thisMBB:
12541  // ...
12542  // fallthrough --> loopMBB
12543  BB->addSuccessor(loop1MBB);
12544 
12545  // The 4-byte load must be aligned, while a char or short may be
12546  // anywhere in the word. Hence all this nasty bookkeeping code.
12547  // add ptr1, ptrA, ptrB [copy if ptrA==0]
12548  // rlwinm shift1, ptr1, 3, 27, 28 [3, 27, 27]
12549  // xori shift, shift1, 24 [16]
12550  // rlwinm ptr, ptr1, 0, 0, 29
12551  // slw newval2, newval, shift
12552  // slw oldval2, oldval,shift
12553  // li mask2, 255 [li mask3, 0; ori mask2, mask3, 65535]
12554  // slw mask, mask2, shift
12555  // and newval3, newval2, mask
12556  // and oldval3, oldval2, mask
12557  // loop1MBB:
12558  // lwarx tmpDest, ptr
12559  // and tmp, tmpDest, mask
12560  // cmpw tmp, oldval3
12561  // bne- midMBB
12562  // loop2MBB:
12563  // andc tmp2, tmpDest, mask
12564  // or tmp4, tmp2, newval3
12565  // stwcx. tmp4, ptr
12566  // bne- loop1MBB
12567  // b exitBB
12568  // midMBB:
12569  // stwcx. tmpDest, ptr
12570  // exitBB:
12571  // srw dest, tmpDest, shift
12572  if (ptrA != ZeroReg) {
12573  Ptr1Reg = RegInfo.createVirtualRegister(RC);
12574  BuildMI(BB, dl, TII->get(is64bit ? PPC::ADD8 : PPC::ADD4), Ptr1Reg)
12575  .addReg(ptrA)
12576  .addReg(ptrB);
12577  } else {
12578  Ptr1Reg = ptrB;
12579  }
12580 
12581  // We need use 32-bit subregister to avoid mismatch register class in 64-bit
12582  // mode.
12583  BuildMI(BB, dl, TII->get(PPC::RLWINM), Shift1Reg)
12584  .addReg(Ptr1Reg, 0, is64bit ? PPC::sub_32 : 0)
12585  .addImm(3)
12586  .addImm(27)
12587  .addImm(is8bit ? 28 : 27);
12588  if (!isLittleEndian)
12589  BuildMI(BB, dl, TII->get(PPC::XORI), ShiftReg)
12590  .addReg(Shift1Reg)
12591  .addImm(is8bit ? 24 : 16);
12592  if (is64bit)
12593  BuildMI(BB, dl, TII->get(PPC::RLDICR), PtrReg)
12594  .addReg(Ptr1Reg)
12595  .addImm(0)
12596  .addImm(61);
12597  else
12598  BuildMI(BB, dl, TII->get(PPC::RLWINM), PtrReg)
12599  .addReg(Ptr1Reg)
12600  .addImm(0)
12601  .addImm(0)
12602  .addImm(29);
12603  BuildMI(BB, dl, TII->get(PPC::SLW), NewVal2Reg)
12604  .addReg(newval)
12605  .addReg(ShiftReg);
12606  BuildMI(BB, dl, TII->get(PPC::SLW), OldVal2Reg)
12607  .addReg(oldval)
12608  .addReg(ShiftReg);
12609  if (is8bit)
12610  BuildMI(BB, dl, TII->get(PPC::LI), Mask2Reg).addImm(255);
12611  else {
12612  BuildMI(BB, dl, TII->get(PPC::LI), Mask3Reg).addImm(0);
12613  BuildMI(BB, dl, TII->get(PPC::ORI), Mask2Reg)
12614  .addReg(Mask3Reg)
12615  .addImm(65535);
12616  }
12617  BuildMI(BB, dl, TII->get(PPC::SLW), MaskReg)
12618  .addReg(Mask2Reg)
12619  .addReg(ShiftReg);
12620  BuildMI(BB, dl, TII->get(PPC::AND), NewVal3Reg)
12621  .addReg(NewVal2Reg)
12622  .addReg(MaskReg);
12623  BuildMI(BB, dl, TII->get(PPC::AND), OldVal3Reg)
12624  .addReg(OldVal2Reg)
12625  .addReg(MaskReg);
12626 
12627  BB = loop1MBB;
12628  BuildMI(BB, dl, TII->get(PPC::LWARX), TmpDestReg)
12629  .addReg(ZeroReg)
12630  .addReg(PtrReg);
12631  BuildMI(BB, dl, TII->get(PPC::AND), TmpReg)
12632  .addReg(TmpDestReg)
12633  .addReg(MaskReg);
12634  BuildMI(BB, dl, TII->get(PPC::CMPW), PPC::CR0)
12635  .addReg(TmpReg)
12636  .addReg(OldVal3Reg);
12637  BuildMI(BB, dl, TII->get(PPC::BCC))
12639  .addReg(PPC::CR0)
12640  .addMBB(midMBB);
12641  BB->addSuccessor(loop2MBB);
12642  BB->addSuccessor(midMBB);
12643 
12644  BB = loop2MBB;
12645  BuildMI(BB, dl, TII->get(PPC::ANDC), Tmp2Reg)
12646  .addReg(TmpDestReg)
12647  .addReg(MaskReg);
12648  BuildMI(BB, dl, TII->get(PPC::OR), Tmp4Reg)
12649  .addReg(Tmp2Reg)
12650  .addReg(NewVal3Reg);
12651  BuildMI(BB, dl, TII->get(PPC::STWCX))
12652  .addReg(Tmp4Reg)
12653  .addReg(ZeroReg)
12654  .addReg(PtrReg);
12655  BuildMI(BB, dl, TII->get(PPC::BCC))
12657  .addReg(PPC::CR0)
12658  .addMBB(loop1MBB);
12659  BuildMI(BB, dl, TII->get(PPC::B)).addMBB(exitMBB);
12660  BB->addSuccessor(loop1MBB);
12661  BB->addSuccessor(exitMBB);
12662 
12663  BB = midMBB;
12664  BuildMI(BB, dl, TII->get(PPC::STWCX))
12665  .addReg(TmpDestReg)
12666  .addReg(ZeroReg)
12667  .addReg(PtrReg);
12668  BB->addSuccessor(exitMBB);
12669 
12670  // exitMBB:
12671  // ...
12672  BB = exitMBB;
12673  BuildMI(*BB, BB->begin(), dl, TII->get(PPC::SRW), dest)
12674  .addReg(TmpReg)
12675  .addReg(ShiftReg);
12676  } else if (MI.getOpcode() == PPC::FADDrtz) {
12677  // This pseudo performs an FADD with rounding mode temporarily forced
12678  // to round-to-zero. We emit this via custom inserter since the FPSCR
12679  // is not modeled at the SelectionDAG level.
12680  Register Dest = MI.getOperand(0).getReg();
12681  Register Src1 = MI.getOperand(1).getReg();
12682  Register Src2 = MI.getOperand(2).getReg();
12683  DebugLoc dl = MI.getDebugLoc();
12684 
12685  MachineRegisterInfo &RegInfo = F->getRegInfo();
12686  Register MFFSReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12687 
12688  // Save FPSCR value.
12689  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), MFFSReg);
12690 
12691  // Set rounding mode to round-to-zero.
12692  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB1))
12693  .addImm(31)
12695 
12696  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSB0))
12697  .addImm(30)
12699 
12700  // Perform addition.
12701  auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
12702  .addReg(Src1)
12703  .addReg(Src2);
12704  if (MI.getFlag(MachineInstr::NoFPExcept))
12706 
12707  // Restore FPSCR value.
12708  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
12709  } else if (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12710  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT ||
12711  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12712  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8) {
12713  unsigned Opcode = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8 ||
12714  MI.getOpcode() == PPC::ANDI_rec_1_GT_BIT8)
12715  ? PPC::ANDI8_rec
12716  : PPC::ANDI_rec;
12717  bool IsEQ = (MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT ||
12718  MI.getOpcode() == PPC::ANDI_rec_1_EQ_BIT8);
12719 
12720  MachineRegisterInfo &RegInfo = F->getRegInfo();
12721  Register Dest = RegInfo.createVirtualRegister(
12722  Opcode == PPC::ANDI_rec ? &PPC::GPRCRegClass : &PPC::G8RCRegClass);
12723 
12724  DebugLoc Dl = MI.getDebugLoc();
12725  BuildMI(*BB, MI, Dl, TII->get(Opcode), Dest)
12726  .addReg(MI.getOperand(1).getReg())
12727  .addImm(1);
12728  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12729  MI.getOperand(0).getReg())
12730  .addReg(IsEQ ? PPC::CR0EQ : PPC::CR0GT);
12731  } else if (MI.getOpcode() == PPC::TCHECK_RET) {
12732  DebugLoc Dl = MI.getDebugLoc();
12733  MachineRegisterInfo &RegInfo = F->getRegInfo();
12734  Register CRReg = RegInfo.createVirtualRegister(&PPC::CRRCRegClass);
12735  BuildMI(*BB, MI, Dl, TII->get(PPC::TCHECK), CRReg);
12736  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12737  MI.getOperand(0).getReg())
12738  .addReg(CRReg);
12739  } else if (MI.getOpcode() == PPC::TBEGIN_RET) {
12740  DebugLoc Dl = MI.getDebugLoc();
12741  unsigned Imm = MI.getOperand(1).getImm();
12742  BuildMI(*BB, MI, Dl, TII->get(PPC::TBEGIN)).addImm(Imm);
12743  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::COPY),
12744  MI.getOperand(0).getReg())
12745  .addReg(PPC::CR0EQ);
12746  } else if (MI.getOpcode() == PPC::SETRNDi) {
12747  DebugLoc dl = MI.getDebugLoc();
12748  Register OldFPSCRReg = MI.getOperand(0).getReg();
12749 
12750  // Save FPSCR value.
12751  if (MRI.use_empty(OldFPSCRReg))
12752  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12753  else
12754  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12755 
12756  // The floating point rounding mode is in the bits 62:63 of FPCSR, and has
12757  // the following settings:
12758  // 00 Round to nearest
12759  // 01 Round to 0
12760  // 10 Round to +inf
12761  // 11 Round to -inf
12762 
12763  // When the operand is immediate, using the two least significant bits of
12764  // the immediate to set the bits 62:63 of FPSCR.
12765  unsigned Mode = MI.getOperand(1).getImm();
12766  BuildMI(*BB, MI, dl, TII->get((Mode & 1) ? PPC::MTFSB1 : PPC::MTFSB0))
12767  .addImm(31)
12769 
12770  BuildMI(*BB, MI, dl, TII->get((Mode & 2) ? PPC::MTFSB1 : PPC::MTFSB0))
12771  .addImm(30)
12773  } else if (MI.getOpcode() == PPC::SETRND) {
12774  DebugLoc dl = MI.getDebugLoc();
12775 
12776  // Copy register from F8RCRegClass::SrcReg to G8RCRegClass::DestReg
12777  // or copy register from G8RCRegClass::SrcReg to F8RCRegClass::DestReg.
12778  // If the target doesn't have DirectMove, we should use stack to do the
12779  // conversion, because the target doesn't have the instructions like mtvsrd
12780  // or mfvsrd to do this conversion directly.
12781  auto copyRegFromG8RCOrF8RC = [&] (unsigned DestReg, unsigned SrcReg) {
12782  if (Subtarget.hasDirectMove()) {
12783  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), DestReg)
12784  .addReg(SrcReg);
12785  } else {
12786  // Use stack to do the register copy.
12787  unsigned StoreOp = PPC::STD, LoadOp = PPC::LFD;
12788  MachineRegisterInfo &RegInfo = F->getRegInfo();
12789  const TargetRegisterClass *RC = RegInfo.getRegClass(SrcReg);
12790  if (RC == &PPC::F8RCRegClass) {
12791  // Copy register from F8RCRegClass to G8RCRegclass.
12792  assert((RegInfo.getRegClass(DestReg) == &PPC::G8RCRegClass) &&
12793  "Unsupported RegClass.");
12794 
12795  StoreOp = PPC::STFD;
12796  LoadOp = PPC::LD;
12797  } else {
12798  // Copy register from G8RCRegClass to F8RCRegclass.
12799  assert((RegInfo.getRegClass(SrcReg) == &PPC::G8RCRegClass) &&
12800  (RegInfo.getRegClass(DestReg) == &PPC::F8RCRegClass) &&
12801  "Unsupported RegClass.");
12802  }
12803 
12804  MachineFrameInfo &MFI = F->getFrameInfo();
12805  int FrameIdx = MFI.CreateStackObject(8, Align(8), false);
12806 
12807  MachineMemOperand *MMOStore = F->getMachineMemOperand(
12808  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12810  MFI.getObjectAlign(FrameIdx));
12811 
12812  // Store the SrcReg into the stack.
12813  BuildMI(*BB, MI, dl, TII->get(StoreOp))
12814  .addReg(SrcReg)
12815  .addImm(0)
12816  .addFrameIndex(FrameIdx)
12817  .addMemOperand(MMOStore);
12818 
12819  MachineMemOperand *MMOLoad = F->getMachineMemOperand(
12820  MachinePointerInfo::getFixedStack(*F, FrameIdx, 0),
12822  MFI.getObjectAlign(FrameIdx));
12823 
12824  // Load from the stack where SrcReg is stored, and save to DestReg,
12825  // so we have done the RegClass conversion from RegClass::SrcReg to
12826  // RegClass::DestReg.
12827  BuildMI(*BB, MI, dl, TII->get(LoadOp), DestReg)
12828  .addImm(0)
12829  .addFrameIndex(FrameIdx)
12830  .addMemOperand(MMOLoad);
12831  }
12832  };
12833 
12834  Register OldFPSCRReg = MI.getOperand(0).getReg();
12835 
12836  // Save FPSCR value.
12837  BuildMI(*BB, MI, dl, TII->get(PPC::MFFS), OldFPSCRReg);
12838 
12839  // When the operand is gprc register, use two least significant bits of the
12840  // register and mtfsf instruction to set the bits 62:63 of FPSCR.
12841  //
12842  // copy OldFPSCRTmpReg, OldFPSCRReg
12843  // (INSERT_SUBREG ExtSrcReg, (IMPLICIT_DEF ImDefReg), SrcOp, 1)
12844  // rldimi NewFPSCRTmpReg, ExtSrcReg, OldFPSCRReg, 0, 62
12845  // copy NewFPSCRReg, NewFPSCRTmpReg
12846  // mtfsf 255, NewFPSCRReg
12847  MachineOperand SrcOp = MI.getOperand(1);
12848  MachineRegisterInfo &RegInfo = F->getRegInfo();
12849  Register OldFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12850 
12851  copyRegFromG8RCOrF8RC(OldFPSCRTmpReg, OldFPSCRReg);
12852 
12853  Register ImDefReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12854  Register ExtSrcReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12855 
12856  // The first operand of INSERT_SUBREG should be a register which has
12857  // subregisters, we only care about its RegClass, so we should use an
12858  // IMPLICIT_DEF register.
12859  BuildMI(*BB, MI, dl, TII->get(TargetOpcode::IMPLICIT_DEF), ImDefReg);
12860  BuildMI(*BB, MI, dl, TII->get(PPC::INSERT_SUBREG), ExtSrcReg)
12861  .addReg(ImDefReg)
12862  .add(SrcOp)
12863  .addImm(1);
12864 
12865  Register NewFPSCRTmpReg = RegInfo.createVirtualRegister(&PPC::G8RCRegClass);
12866  BuildMI(*BB, MI, dl, TII->get(PPC::RLDIMI), NewFPSCRTmpReg)
12867  .addReg(OldFPSCRTmpReg)
12868  .addReg(ExtSrcReg)
12869  .addImm(0)
12870  .addImm(62);
12871 
12872  Register NewFPSCRReg = RegInfo.createVirtualRegister(&PPC::F8RCRegClass);
12873  copyRegFromG8RCOrF8RC(NewFPSCRReg, NewFPSCRTmpReg);
12874 
12875  // The mask 255 means that put the 32:63 bits of NewFPSCRReg to the 32:63
12876  // bits of FPSCR.
12877  BuildMI(*BB, MI, dl, TII->get(PPC::MTFSF))
12878  .addImm(255)
12879  .addReg(NewFPSCRReg)
12880  .addImm(0)
12881  .addImm(0);
12882  } else if (MI.getOpcode() == PPC::SETFLM) {
12883  DebugLoc Dl = MI.getDebugLoc();
12884 
12885  // Result of setflm is previous FPSCR content, so we need to save it first.
12886  Register OldFPSCRReg = MI.getOperand(0).getReg();
12887  if (MRI.use_empty(OldFPSCRReg))
12888  BuildMI(*BB, MI, Dl, TII->get(TargetOpcode::IMPLICIT_DEF), OldFPSCRReg);
12889  else
12890  BuildMI(*BB, MI, Dl, TII->get(PPC::MFFS), OldFPSCRReg);
12891 
12892  // Put bits in 32:63 to FPSCR.
12893  Register NewFPSCRReg = MI.getOperand(1).getReg();
12894  BuildMI(*BB, MI, Dl, TII->get(PPC::MTFSF))
12895  .addImm(255)
12896  .addReg(NewFPSCRReg)
12897  .addImm(0)
12898  .addImm(0);
12899  } else if (MI.getOpcode() == PPC::PROBED_ALLOCA_32 ||
12900  MI.getOpcode() == PPC::PROBED_ALLOCA_64) {
12901  return emitProbedAlloca(MI, BB);
12902  } else if (MI.getOpcode() == PPC::SPLIT_QUADWORD) {
12903  DebugLoc DL = MI.getDebugLoc();
12904  Register Src = MI.getOperand(2).getReg();
12905  Register Lo = MI.getOperand(0).getReg();
12906  Register Hi = MI.getOperand(1).getReg();
12907  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12908  .addDef(Lo)
12909  .addUse(Src, 0, PPC::sub_gp8_x1);
12910  BuildMI(*BB, MI, DL, TII->get(TargetOpcode::COPY))
12911  .addDef(Hi)
12912  .addUse(Src, 0, PPC::sub_gp8_x0);
12913  } else if (MI.getOpcode() == PPC::LQX_PSEUDO ||
12914  MI.getOpcode() == PPC::STQX_PSEUDO) {
12915  DebugLoc DL = MI.getDebugLoc();
12916  // Ptr is used as the ptr_rc_no_r0 part
12917  // of LQ/STQ's memory operand and adding result of RA and RB,
12918  // so it has to be g8rc_and_g8rc_nox0.
12919  Register Ptr =
12920  F->getRegInfo().createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
12921  Register Val = MI.getOperand(0).getReg();
12922  Register RA = MI.getOperand(1).getReg();
12923  Register RB = MI.getOperand(2).getReg();
12924  BuildMI(*BB, MI, DL, TII->get(PPC::ADD8), Ptr).addReg(RA).addReg(RB);
12925  BuildMI(*BB, MI, DL,
12926  MI.getOpcode() == PPC::LQX_PSEUDO ? TII->get(PPC::LQ)
12927  : TII->get(PPC::STQ))
12928  .addReg(Val, MI.getOpcode() == PPC::LQX_PSEUDO ? RegState::Define : 0)
12929  .addImm(0)
12930  .addReg(Ptr);
12931  } else {
12932  llvm_unreachable("Unexpected instr type to insert");
12933  }
12934 
12935  MI.eraseFromParent(); // The pseudo instruction is gone now.
12936  return BB;
12937 }
12938 
12939 //===----------------------------------------------------------------------===//
12940 // Target Optimization Hooks
12941 //===----------------------------------------------------------------------===//
12942 
12943 static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget) {
12944  // For the estimates, convergence is quadratic, so we essentially double the
12945  // number of digits correct after every iteration. For both FRE and FRSQRTE,
12946  // the minimum architected relative accuracy is 2^-5. When hasRecipPrec(),
12947  // this is 2^-14. IEEE float has 23 digits and double has 52 digits.
12948  int RefinementSteps = Subtarget.hasRecipPrec() ? 1 : 3;
12949  if (VT.getScalarType() == MVT::f64)
12950  RefinementSteps++;
12951  return RefinementSteps;
12952 }
12953 
12954 SDValue PPCTargetLowering::getSqrtInputTest(SDValue Op, SelectionDAG &DAG,
12955  const DenormalMode &Mode) const {
12956  // We only have VSX Vector Test for software Square Root.
12957  EVT VT = Op.getValueType();
12958  if (!isTypeLegal(MVT::i1) ||
12959  (VT != MVT::f64 &&
12960  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX())))
12962 
12963  SDLoc DL(Op);
12964  // The output register of FTSQRT is CR field.
12966  // ftsqrt BF,FRB
12967  // Let e_b be the unbiased exponent of the double-precision
12968  // floating-point operand in register FRB.
12969  // fe_flag is set to 1 if either of the following conditions occurs.
12970  // - The double-precision floating-point operand in register FRB is a zero,
12971  // a NaN, or an infinity, or a negative value.
12972  // - e_b is less than or equal to -970.
12973  // Otherwise fe_flag is set to 0.
12974  // Both VSX and non-VSX versions would set EQ bit in the CR if the number is
12975  // not eligible for iteration. (zero/negative/infinity/nan or unbiased
12976  // exponent is less than -970)
12977  SDValue SRIdxVal = DAG.getTargetConstant(PPC::sub_eq, DL, MVT::i32);
12978  return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i1,
12979  FTSQRT, SRIdxVal),
12980  0);
12981 }
12982 
12983 SDValue
12984 PPCTargetLowering::getSqrtResultForDenormInput(SDValue Op,
12985  SelectionDAG &DAG) const {
12986  // We only have VSX Vector Square Root.
12987  EVT VT = Op.getValueType();
12988  if (VT != MVT::f64 &&
12989  ((VT != MVT::v2f64 && VT != MVT::v4f32) || !Subtarget.hasVSX()))
12991 
12992  return DAG.getNode(PPCISD::FSQRT, SDLoc(Op), VT, Op);
12993 }
12994 
12995 SDValue PPCTargetLowering::getSqrtEstimate(SDValue Operand, SelectionDAG &DAG,
12996  int Enabled, int &RefinementSteps,
12997  bool &UseOneConstNR,
12998  bool Reciprocal) const {
12999  EVT VT = Operand.getValueType();
13000  if ((VT == MVT::f32 && Subtarget.hasFRSQRTES()) ||
13001  (VT == MVT::f64 && Subtarget.hasFRSQRTE()) ||
13002  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13003  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13004  if (RefinementSteps == ReciprocalEstimate::Unspecified)
13005  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13006 
13007  // The Newton-Raphson computation with a single constant does not provide
13008  // enough accuracy on some CPUs.
13009  UseOneConstNR = !Subtarget.needsTwoConstNR();
13010  return DAG.getNode(PPCISD::FRSQRTE, SDLoc(Operand), VT, Operand);
13011  }
13012  return SDValue();
13013 }
13014 
13015 SDValue PPCTargetLowering::getRecipEstimate(SDValue Operand, SelectionDAG &DAG,
13016  int Enabled,
13017  int &RefinementSteps) const {
13018  EVT VT = Operand.getValueType();
13019  if ((VT == MVT::f32 && Subtarget.hasFRES()) ||
13020  (VT == MVT::f64 && Subtarget.hasFRE()) ||
13021  (VT == MVT::v4f32 && Subtarget.hasAltivec()) ||
13022  (VT == MVT::v2f64 && Subtarget.hasVSX())) {
13023  if (RefinementSteps == ReciprocalEstimate::Unspecified)
13024  RefinementSteps = getEstimateRefinementSteps(VT, Subtarget);
13025  return DAG.getNode(PPCISD::FRE, SDLoc(Operand), VT, Operand);
13026  }
13027  return SDValue();
13028 }
13029 
13030 unsigned PPCTargetLowering::combineRepeatedFPDivisors() const {
13031  // Note: This functionality is used only when unsafe-fp-math is enabled, and
13032  // on cores with reciprocal estimates (which are used when unsafe-fp-math is
13033  // enabled for division), this functionality is redundant with the default
13034  // combiner logic (once the division -> reciprocal/multiply transformation
13035  // has taken place). As a result, this matters more for older cores than for
13036  // newer ones.
13037 
13038  // Combine multiple FDIVs with the same divisor into multiple FMULs by the
13039  // reciprocal if there are two or more FDIVs (for embedded cores with only
13040  // one FP pipeline) for three or more FDIVs (for generic OOO cores).
13041  switch (Subtarget.getCPUDirective()) {
13042  default:
13043  return 3;
13044  case PPC::DIR_440:
13045  case PPC::DIR_A2:
13046  case PPC::DIR_E500:
13047  case PPC::DIR_E500mc:
13048  case PPC::DIR_E5500:
13049  return 2;
13050  }
13051 }
13052 
13053 // isConsecutiveLSLoc needs to work even if all adds have not yet been
13054 // collapsed, and so we need to look through chains of them.
13056  int64_t& Offset, SelectionDAG &DAG) {
13057  if (DAG.isBaseWithConstantOffset(Loc)) {
13058  Base = Loc.getOperand(0);
13059  Offset += cast<ConstantSDNode>(Loc.getOperand(1))->getSExtValue();
13060 
13061  // The base might itself be a base plus an offset, and if so, accumulate
13062  // that as well.
13064  }
13065 }
13066 
13068  unsigned Bytes, int Dist,
13069  SelectionDAG &DAG) {
13070  if (VT.getSizeInBits() / 8 != Bytes)
13071  return false;
13072 
13073  SDValue BaseLoc = Base->getBasePtr();
13074  if (Loc.getOpcode() == ISD::FrameIndex) {
13075  if (BaseLoc.getOpcode() != ISD::FrameIndex)
13076  return false;
13077  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
13078  int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
13079  int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
13080  int FS = MFI.getObjectSize(FI);
13081  int BFS = MFI.getObjectSize(BFI);
13082  if (FS != BFS || FS != (int)Bytes) return false;
13083  return MFI.getObjectOffset(FI) == (MFI.getObjectOffset(BFI) + Dist*Bytes);
13084  }
13085 
13086  SDValue Base1 = Loc, Base2 = BaseLoc;
13087  int64_t Offset1 = 0, Offset2 = 0;
13088  getBaseWithConstantOffset(Loc, Base1, Offset1, DAG);
13089  getBaseWithConstantOffset(BaseLoc, Base2, Offset2, DAG);
13090  if (Base1 == Base2 && Offset1 == (Offset2 + Dist * Bytes))
13091  return true;
13092 
13093  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
13094  const GlobalValue *GV1 = nullptr;
13095  const GlobalValue *GV2 = nullptr;
13096  Offset1 = 0;
13097  Offset2 = 0;
13098  bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
13099  bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
13100  if (isGA1 && isGA2 && GV1 == GV2)
13101  return Offset1 == (Offset2 + Dist*Bytes);
13102  return false;
13103 }
13104 
13105 // Like SelectionDAG::isConsecutiveLoad, but also works for stores, and does
13106 // not enforce equality of the chain operands.
13108  unsigned Bytes, int Dist,
13109  SelectionDAG &DAG) {
13110  if (LSBaseSDNode *LS = dyn_cast<LSBaseSDNode>(N)) {
13111  EVT VT = LS->getMemoryVT();
13112  SDValue Loc = LS->getBasePtr();
13113  return isConsecutiveLSLoc(Loc, VT, Base, Bytes, Dist, DAG);
13114  }
13115 
13116  if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN) {
13117  EVT VT;
13118  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13119  default: return false;
13120  case Intrinsic::ppc_altivec_lvx:
13121  case Intrinsic::ppc_altivec_lvxl:
13122  case Intrinsic::ppc_vsx_lxvw4x:
13123  case Intrinsic::ppc_vsx_lxvw4x_be:
13124  VT = MVT::v4i32;
13125  break;
13126  case Intrinsic::ppc_vsx_lxvd2x:
13127  case Intrinsic::ppc_vsx_lxvd2x_be:
13128  VT = MVT::v2f64;
13129  break;
13130  case Intrinsic::ppc_altivec_lvebx:
13131  VT = MVT::i8;
13132  break;
13133  case Intrinsic::ppc_altivec_lvehx:
13134  VT = MVT::i16;
13135  break;
13136  case Intrinsic::ppc_altivec_lvewx:
13137  VT = MVT::i32;
13138  break;
13139  }
13140 
13141  return isConsecutiveLSLoc(N->getOperand(2), VT, Base, Bytes, Dist, DAG);
13142  }
13143 
13144  if (N->getOpcode() == ISD::INTRINSIC_VOID) {
13145  EVT VT;
13146  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
13147  default: return false;
13148  case Intrinsic::ppc_altivec_stvx:
13149  case Intrinsic::ppc_altivec_stvxl:
13150  case Intrinsic::ppc_vsx_stxvw4x:
13151  VT = MVT::v4i32;
13152  break;
13153  case Intrinsic::ppc_vsx_stxvd2x:
13154  VT = MVT::v2f64;
13155  break;
13156  case Intrinsic::ppc_vsx_stxvw4x_be:
13157  VT = MVT::v4i32;
13158  break;
13159  case Intrinsic::ppc_vsx_stxvd2x_be:
13160  VT = MVT::v2f64;
13161  break;
13162  case Intrinsic::ppc_altivec_stvebx:
13163  VT = MVT::i8;
13164  break;
13165  case Intrinsic::ppc_altivec_stvehx:
13166  VT = MVT::i16;
13167  break;
13168  case Intrinsic::ppc_altivec_stvewx:
13169  VT = MVT::i32;
13170  break;
13171  }
13172 
13173  return isConsecutiveLSLoc(N->getOperand(3), VT, Base, Bytes, Dist, DAG);
13174  }
13175 
13176  return false;
13177 }
13178 
13179 // Return true is there is a nearyby consecutive load to the one provided
13180 // (regardless of alignment). We search up and down the chain, looking though
13181 // token factors and other loads (but nothing else). As a result, a true result
13182 // indicates that it is safe to create a new consecutive load adjacent to the
13183 // load provided.
13185  SDValue Chain = LD->getChain();
13186  EVT VT = LD->getMemoryVT();
13187 
13188  SmallSet<SDNode *, 16> LoadRoots;
13189  SmallVector<SDNode *, 8> Queue(1, Chain.getNode());
13190  SmallSet<SDNode *, 16> Visited;
13191 
13192  // First, search up the chain, branching to follow all token-factor operands.
13193  // If we find a consecutive load, then we're done, otherwise, record all
13194  // nodes just above the top-level loads and token factors.
13195  while (!Queue.empty()) {
13196  SDNode *ChainNext = Queue.pop_back_val();
13197  if (!Visited.insert(ChainNext).second)
13198  continue;
13199 
13200  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(ChainNext)) {
13201  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13202  return true;
13203 
13204  if (!Visited.count(ChainLD->getChain().getNode()))
13205  Queue.push_back(ChainLD->getChain().getNode());
13206  } else if (ChainNext->getOpcode() == ISD::TokenFactor) {
13207  for (const SDUse &O : ChainNext->ops())
13208  if (!Visited.count(O.getNode()))
13209  Queue.push_back(O.getNode());
13210  } else
13211  LoadRoots.insert(ChainNext);
13212  }
13213 
13214  // Second, search down the chain, starting from the top-level nodes recorded
13215  // in the first phase. These top-level nodes are the nodes just above all
13216  // loads and token factors. Starting with their uses, recursively look though
13217  // all loads (just the chain uses) and token factors to find a consecutive
13218  // load.
13219  Visited.clear();
13220  Queue.clear();
13221 
13222  for (SmallSet<SDNode *, 16>::iterator I = LoadRoots.begin(),
13223  IE = LoadRoots.end(); I != IE; ++I) {
13224  Queue.push_back(*I);
13225 
13226  while (!Queue.empty()) {
13227  SDNode *LoadRoot = Queue.pop_back_val();
13228  if (!Visited.insert(LoadRoot).second)
13229  continue;
13230 
13231  if (MemSDNode *ChainLD = dyn_cast<MemSDNode>(LoadRoot))
13232  if (isConsecutiveLS(ChainLD, LD, VT.getStoreSize(), 1, DAG))
13233  return true;
13234 
13235  for (SDNode *U : LoadRoot->uses())
13236  if (((isa<MemSDNode>(U) &&
13237  cast<MemSDNode>(U)->getChain().getNode() == LoadRoot) ||
13238  U->getOpcode() == ISD::TokenFactor) &&
13239  !Visited.count(U))
13240  Queue.push_back(U);
13241  }
13242  }
13243 
13244  return false;
13245 }
13246 
13247 /// This function is called when we have proved that a SETCC node can be replaced
13248 /// by subtraction (and other supporting instructions) so that the result of
13249 /// comparison is kept in a GPR instead of CR. This function is purely for
13250 /// codegen purposes and has some flags to guide the codegen process.
13251 static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement,
13252  bool Swap, SDLoc &DL, SelectionDAG &DAG) {
13253  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13254 
13255  // Zero extend the operands to the largest legal integer. Originally, they
13256  // must be of a strictly smaller size.
13257  auto Op0 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(0),
13258  DAG.getConstant(Size, DL, MVT::i32));
13259  auto Op1 = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, N->getOperand(1),
13260  DAG.getConstant(Size, DL, MVT::i32));
13261 
13262  // Swap if needed. Depends on the condition code.
13263  if (Swap)
13264  std::swap(Op0, Op1);
13265 
13266  // Subtract extended integers.
13267  auto SubNode = DAG.getNode(ISD::SUB, DL, MVT::i64, Op0, Op1);
13268 
13269  // Move the sign bit to the least significant position and zero out the rest.
13270  // Now the least significant bit carries the result of original comparison.
13271  auto Shifted = DAG.getNode(ISD::SRL, DL, MVT::i64, SubNode,
13272  DAG.getConstant(Size - 1, DL, MVT::i32));
13273  auto Final = Shifted;
13274 
13275  // Complement the result if needed. Based on the condition code.
13276  if (Complement)
13277  Final = DAG.getNode(ISD::XOR, DL, MVT::i64, Shifted,
13278  DAG.getConstant(1, DL, MVT::i64));
13279 
13280  return DAG.getNode(ISD::TRUNCATE, DL, MVT::i1, Final);
13281 }
13282 
13283 SDValue PPCTargetLowering::ConvertSETCCToSubtract(SDNode *N,
13284  DAGCombinerInfo &DCI) const {
13285  assert(N->getOpcode() == ISD::SETCC && "ISD::SETCC Expected.");
13286 
13287  SelectionDAG &DAG = DCI.DAG;
13288  SDLoc DL(N);
13289 
13290  // Size of integers being compared has a critical role in the following
13291  // analysis, so we prefer to do this when all types are legal.
13292  if (!DCI.isAfterLegalizeDAG())
13293  return SDValue();
13294 
13295  // If all users of SETCC extend its value to a legal integer type
13296  // then we replace SETCC with a subtraction
13297  for (const SDNode *U : N->uses())
13298  if (U->getOpcode() != ISD::ZERO_EXTEND)
13299  return SDValue();
13300 
13301  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13302  auto OpSize = N->getOperand(0).getValueSizeInBits();
13303 
13305 
13306  if (OpSize < Size) {
13307  switch (CC) {
13308  default: break;
13309  case ISD::SETULT:
13310  return generateEquivalentSub(N, Size, false, false, DL, DAG);
13311  case ISD::SETULE:
13312  return generateEquivalentSub(N, Size, true, true, DL, DAG);
13313  case ISD::SETUGT:
13314  return generateEquivalentSub(N, Size, false, true, DL, DAG);
13315  case ISD::SETUGE:
13316  return generateEquivalentSub(N, Size, true, false, DL, DAG);
13317  }
13318  }
13319 
13320  return SDValue();
13321 }
13322 
13323 SDValue PPCTargetLowering::DAGCombineTruncBoolExt(SDNode *N,
13324  DAGCombinerInfo &DCI) const {
13325  SelectionDAG &DAG = DCI.DAG;
13326  SDLoc dl(N);
13327 
13328  assert(Subtarget.useCRBits() && "Expecting to be tracking CR bits");
13329  // If we're tracking CR bits, we need to be careful that we don't have:
13330  // trunc(binary-ops(zext(x), zext(y)))
13331  // or
13332  // trunc(binary-ops(binary-ops(zext(x), zext(y)), ...)
13333  // such that we're unnecessarily moving things into GPRs when it would be
13334  // better to keep them in CR bits.
13335 
13336  // Note that trunc here can be an actual i1 trunc, or can be the effective
13337  // truncation that comes from a setcc or select_cc.
13338  if (N->getOpcode() == ISD::TRUNCATE &&
13339  N->getValueType(0) != MVT::i1)
13340  return SDValue();
13341 
13342  if (N->getOperand(0).getValueType() != MVT::i32 &&
13343  N->getOperand(0).getValueType() != MVT::i64)
13344  return SDValue();
13345 
13346  if (N->getOpcode() == ISD::SETCC ||
13347  N->getOpcode() == ISD::SELECT_CC) {
13348  // If we're looking at a comparison, then we need to make sure that the
13349  // high bits (all except for the first) don't matter the result.
13350  ISD::CondCode CC =
13351  cast<CondCodeSDNode>(N->getOperand(
13352  N->getOpcode() == ISD::SETCC ? 2 : 4))->get();
13353  unsigned OpBits = N->getOperand(0).getValueSizeInBits();
13354 
13355  if (ISD::isSignedIntSetCC(CC)) {
13356  if (DAG.ComputeNumSignBits(N->getOperand(0)) != OpBits ||
13357  DAG.ComputeNumSignBits(N->getOperand(1)) != OpBits)
13358  return SDValue();
13359  } else if (ISD::isUnsignedIntSetCC(CC)) {
13360  if (!DAG.MaskedValueIsZero(N->getOperand(0),
13361  APInt::getHighBitsSet(OpBits, OpBits-1)) ||
13362  !DAG.MaskedValueIsZero(N->getOperand(1),
13363  APInt::getHighBitsSet(OpBits, OpBits-1)))
13364  return (N->getOpcode() == ISD::SETCC ? ConvertSETCCToSubtract(N, DCI)
13365  : SDValue());
13366  } else {
13367  // This is neither a signed nor an unsigned comparison, just make sure
13368  // that the high bits are equal.
13369  KnownBits Op1Known = DAG.computeKnownBits(N->getOperand(0));
13370  KnownBits Op2Known = DAG.computeKnownBits(N->getOperand(1));
13371 
13372  // We don't really care about what is known about the first bit (if
13373  // anything), so pretend that it is known zero for both to ensure they can
13374  // be compared as constants.
13375  Op1Known.Zero.setBit(0); Op1Known.One.clearBit(0);
13376  Op2Known.Zero.setBit(0); Op2Known.One.clearBit(0);
13377 
13378  if (!Op1Known.isConstant() || !Op2Known.isConstant() ||
13379  Op1Known.getConstant() != Op2Known.getConstant())
13380  return SDValue();
13381  }
13382  }
13383 
13384  // We now know that the higher-order bits are irrelevant, we just need to
13385  // make sure that all of the intermediate operations are bit operations, and
13386  // all inputs are extensions.
13387  if (N->getOperand(0).getOpcode() != ISD::AND &&
13388  N->getOperand(0).getOpcode() != ISD::OR &&
13389  N->getOperand(0).getOpcode() != ISD::XOR &&
13390  N->getOperand(0).getOpcode() != ISD::SELECT &&
13391  N->getOperand(0).getOpcode() != ISD::SELECT_CC &&
13392  N->getOperand(0).getOpcode() != ISD::TRUNCATE &&
13393  N->getOperand(0).getOpcode() != ISD::SIGN_EXTEND &&
13394  N->getOperand(0).getOpcode() != ISD::ZERO_EXTEND &&
13395  N->getOperand(0).getOpcode() != ISD::ANY_EXTEND)
13396  return SDValue();
13397 
13398  if ((N->getOpcode() == ISD::SETCC || N->getOpcode() == ISD::SELECT_CC) &&
13399  N->getOperand(1).getOpcode() != ISD::AND &&
13400  N->getOperand(1).getOpcode() != ISD::OR &&
13401  N->getOperand(1).getOpcode() != ISD::XOR &&
13402  N->getOperand(1).getOpcode() != ISD::SELECT &&
13403  N->getOperand(1).getOpcode() != ISD::SELECT_CC &&
13404  N->getOperand(1).getOpcode() != ISD::TRUNCATE &&
13405  N->getOperand(1).getOpcode() != ISD::SIGN_EXTEND &&
13406  N->getOperand(1).getOpcode() != ISD::ZERO_EXTEND &&
13407  N->getOperand(1).getOpcode() != ISD::ANY_EXTEND)
13408  return SDValue();
13409 
13410  SmallVector<SDValue, 4> Inputs;
13411  SmallVector<SDValue, 8> BinOps, PromOps;
13412  SmallPtrSet<SDNode *, 16> Visited;
13413 
13414  for (unsigned i = 0; i < 2; ++i) {
13415  if (((N->getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13416  N->getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13417  N->getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13418  N->getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13419  isa<ConstantSDNode>(N->getOperand(i)))
13420  Inputs.push_back(N->getOperand(i));
13421  else
13422  BinOps.push_back(N->getOperand(i));
13423 
13424  if (N->getOpcode() == ISD::TRUNCATE)
13425  break;
13426  }
13427 
13428  // Visit all inputs, collect all binary operations (and, or, xor and
13429  // select) that are all fed by extensions.
13430  while (!BinOps.empty()) {
13431  SDValue BinOp = BinOps.pop_back_val();
13432 
13433  if (!Visited.insert(BinOp.getNode()).second)
13434  continue;
13435 
13436  PromOps.push_back(BinOp);
13437 
13438  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13439  // The condition of the select is not promoted.
13440  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13441  continue;
13442  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13443  continue;
13444 
13445  if (((BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13446  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13447  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) &&
13448  BinOp.getOperand(i).getOperand(0).getValueType() == MVT::i1) ||
13449  isa<ConstantSDNode>(BinOp.getOperand(i))) {
13450  Inputs.push_back(BinOp.getOperand(i));
13451  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13452  BinOp.getOperand(i).getOpcode() == ISD::OR ||
13453  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13454  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13455  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC ||
13456  BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13457  BinOp.getOperand(i).getOpcode() == ISD::SIGN_EXTEND ||
13458  BinOp.getOperand(i).getOpcode() == ISD::ZERO_EXTEND ||
13459  BinOp.getOperand(i).getOpcode() == ISD::ANY_EXTEND) {
13460  BinOps.push_back(BinOp.getOperand(i));
13461  } else {
13462  // We have an input that is not an extension or another binary
13463  // operation; we'll abort this transformation.
13464  return SDValue();
13465  }
13466  }
13467  }
13468 
13469  // Make sure that this is a self-contained cluster of operations (which
13470  // is not quite the same thing as saying that everything has only one
13471  // use).
13472  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13473  if (isa<ConstantSDNode>(Inputs[i]))
13474  continue;
13475 
13476  for (const SDNode *User : Inputs[i].getNode()->uses()) {
13477  if (User != N && !Visited.count(User))
13478  return SDValue();
13479 
13480  // Make sure that we're not going to promote the non-output-value
13481  // operand(s) or SELECT or SELECT_CC.
13482  // FIXME: Although we could sometimes handle this, and it does occur in
13483  // practice that one of the condition inputs to the select is also one of
13484  // the outputs, we currently can't deal with this.
13485  if (User->getOpcode() == ISD::SELECT) {
13486  if (User->getOperand(0) == Inputs[i])
13487  return SDValue();
13488  } else if (User->getOpcode() == ISD::SELECT_CC) {
13489  if (User->getOperand(0) == Inputs[i] ||
13490  User->getOperand(1) == Inputs[i])
13491  return SDValue();
13492  }
13493  }
13494  }
13495 
13496  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13497  for (const SDNode *User : PromOps[i].getNode()->uses()) {
13498  if (User != N && !Visited.count(User))
13499  return SDValue();
13500 
13501  // Make sure that we're not going to promote the non-output-value
13502  // operand(s) or SELECT or SELECT_CC.
13503  // FIXME: Although we could sometimes handle this, and it does occur in
13504  // practice that one of the condition inputs to the select is also one of
13505  // the outputs, we currently can't deal with this.
13506  if (User->getOpcode() == ISD::SELECT) {
13507  if (User->getOperand(0) == PromOps[i])
13508  return SDValue();
13509  } else if (User->getOpcode() == ISD::SELECT_CC) {
13510  if (User->getOperand(0) == PromOps[i] ||
13511  User->getOperand(1) == PromOps[i])
13512  return SDValue();
13513  }
13514  }
13515  }
13516 
13517  // Replace all inputs with the extension operand.
13518  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13519  // Constants may have users outside the cluster of to-be-promoted nodes,
13520  // and so we need to replace those as we do the promotions.
13521  if (isa<ConstantSDNode>(Inputs[i]))
13522  continue;
13523  else
13524  DAG.ReplaceAllUsesOfValueWith(Inputs[i], Inputs[i].getOperand(0));
13525  }
13526 
13527  std::list<HandleSDNode> PromOpHandles;
13528  for (auto &PromOp : PromOps)
13529  PromOpHandles.emplace_back(PromOp);
13530 
13531  // Replace all operations (these are all the same, but have a different
13532  // (i1) return type). DAG.getNode will validate that the types of
13533  // a binary operator match, so go through the list in reverse so that
13534  // we've likely promoted both operands first. Any intermediate truncations or
13535  // extensions disappear.
13536  while (!PromOpHandles.empty()) {
13537  SDValue PromOp = PromOpHandles.back().getValue();
13538  PromOpHandles.pop_back();
13539 
13540  if (PromOp.getOpcode() == ISD::TRUNCATE ||
13541  PromOp.getOpcode() == ISD::SIGN_EXTEND ||
13542  PromOp.getOpcode() == ISD::ZERO_EXTEND ||
13543  PromOp.getOpcode() == ISD::ANY_EXTEND) {
13544  if (!isa<ConstantSDNode>(PromOp.getOperand(0)) &&
13545  PromOp.getOperand(0).getValueType() != MVT::i1) {
13546  // The operand is not yet ready (see comment below).
13547  PromOpHandles.emplace_front(PromOp);
13548  continue;
13549  }
13550 
13551  SDValue RepValue = PromOp.getOperand(0);
13552  if (isa<ConstantSDNode>(RepValue))
13553  RepValue = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, RepValue);
13554 
13555  DAG.ReplaceAllUsesOfValueWith(PromOp, RepValue);
13556  continue;
13557  }
13558 
13559  unsigned C;
13560  switch (PromOp.getOpcode()) {
13561  default: C = 0; break;
13562  case ISD::SELECT: C = 1; break;
13563  case ISD::SELECT_CC: C = 2; break;
13564  }
13565 
13566  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13567  PromOp.getOperand(C).getValueType() != MVT::i1) ||
13568  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13569  PromOp.getOperand(C+1).getValueType() != MVT::i1)) {
13570  // The to-be-promoted operands of this node have not yet been
13571  // promoted (this should be rare because we're going through the
13572  // list backward, but if one of the operands has several users in
13573  // this cluster of to-be-promoted nodes, it is possible).
13574  PromOpHandles.emplace_front(PromOp);
13575  continue;
13576  }
13577 
13578  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13579  PromOp.getNode()->op_end());
13580 
13581  // If there are any constant inputs, make sure they're replaced now.
13582  for (unsigned i = 0; i < 2; ++i)
13583  if (isa<ConstantSDNode>(Ops[C+i]))
13584  Ops[C+i] = DAG.getNode(ISD::TRUNCATE, dl, MVT::i1, Ops[C+i]);
13585 
13586  DAG.ReplaceAllUsesOfValueWith(PromOp,
13587  DAG.getNode(PromOp.getOpcode(), dl, MVT::i1, Ops));
13588  }
13589 
13590  // Now we're left with the initial truncation itself.
13591  if (N->getOpcode() == ISD::TRUNCATE)
13592  return N->getOperand(0);
13593 
13594  // Otherwise, this is a comparison. The operands to be compared have just
13595  // changed type (to i1), but everything else is the same.
13596  return SDValue(N, 0);
13597 }
13598 
13599 SDValue PPCTargetLowering::DAGCombineExtBoolTrunc(SDNode *N,
13600  DAGCombinerInfo &DCI) const {
13601  SelectionDAG &DAG = DCI.DAG;
13602  SDLoc dl(N);
13603 
13604  // If we're tracking CR bits, we need to be careful that we don't have:
13605  // zext(binary-ops(trunc(x), trunc(y)))
13606  // or
13607  // zext(binary-ops(binary-ops(trunc(x), trunc(y)), ...)
13608  // such that we're unnecessarily moving things into CR bits that can more
13609  // efficiently stay in GPRs. Note that if we're not certain that the high
13610  // bits are set as required by the final extension, we still may need to do
13611  // some masking to get the proper behavior.
13612 
13613  // This same functionality is important on PPC64 when dealing with
13614  // 32-to-64-bit extensions; these occur often when 32-bit values are used as
13615  // the return values of functions. Because it is so similar, it is handled
13616  // here as well.
13617 
13618  if (N->getValueType(0) != MVT::i32 &&
13619  N->getValueType(0) != MVT::i64)
13620  return SDValue();
13621 
13622  if (!((N->getOperand(0).getValueType() == MVT::i1 && Subtarget.useCRBits()) ||
13623  (N->getOperand(0).getValueType() == MVT::i32 && Subtarget.isPPC64())))
13624  return SDValue();
13625 
13626  if (N->getOperand(0).getOpcode() != ISD::AND &&
13627  N->getOperand(0).getOpcode() != ISD::OR &&
13628  N->getOperand(0).getOpcode() != ISD::XOR &&
13629  N->getOperand(0).getOpcode() != ISD::SELECT &&
13630  N->getOperand(0).getOpcode() != ISD::SELECT_CC)
13631  return SDValue();
13632 
13633  SmallVector<SDValue, 4> Inputs;
13634  SmallVector<SDValue, 8> BinOps(1, N->getOperand(0)), PromOps;
13635  SmallPtrSet<SDNode *, 16> Visited;
13636 
13637  // Visit all inputs, collect all binary operations (and, or, xor and
13638  // select) that are all fed by truncations.
13639  while (!BinOps.empty()) {
13640  SDValue BinOp = BinOps.pop_back_val();
13641 
13642  if (!Visited.insert(BinOp.getNode()).second)
13643  continue;
13644 
13645  PromOps.push_back(BinOp);
13646 
13647  for (unsigned i = 0, ie = BinOp.getNumOperands(); i != ie; ++i) {
13648  // The condition of the select is not promoted.
13649  if (BinOp.getOpcode() == ISD::SELECT && i == 0)
13650  continue;
13651  if (BinOp.getOpcode() == ISD::SELECT_CC && i != 2 && i != 3)
13652  continue;
13653 
13654  if (BinOp.getOperand(i).getOpcode() == ISD::TRUNCATE ||
13655  isa<ConstantSDNode>(BinOp.getOperand(i))) {
13656  Inputs.push_back(BinOp.getOperand(i));
13657  } else if (BinOp.getOperand(i).getOpcode() == ISD::AND ||
13658  BinOp.getOperand(i).getOpcode() == ISD::OR ||
13659  BinOp.getOperand(i).getOpcode() == ISD::XOR ||
13660  BinOp.getOperand(i).getOpcode() == ISD::SELECT ||
13661  BinOp.getOperand(i).getOpcode() == ISD::SELECT_CC) {
13662  BinOps.push_back(BinOp.getOperand(i));
13663  } else {
13664  // We have an input that is not a truncation or another binary
13665  // operation; we'll abort this transformation.
13666  return SDValue();
13667  }
13668  }
13669  }
13670 
13671  // The operands of a select that must be truncated when the select is
13672  // promoted because the operand is actually part of the to-be-promoted set.
13673  DenseMap<SDNode *, EVT> SelectTruncOp[2];
13674 
13675  // Make sure that this is a self-contained cluster of operations (which
13676  // is not quite the same thing as saying that everything has only one
13677  // use).
13678  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13679  if (isa<ConstantSDNode>(Inputs[i]))
13680  continue;
13681 
13682  for (SDNode *User : Inputs[i].getNode()->uses()) {
13683  if (User != N && !Visited.count(User))
13684  return SDValue();
13685 
13686  // If we're going to promote the non-output-value operand(s) or SELECT or
13687  // SELECT_CC, record them for truncation.
13688  if (User->getOpcode() == ISD::SELECT) {
13689  if (User->getOperand(0) == Inputs[i])
13690  SelectTruncOp[0].insert(std::make_pair(User,
13691  User->getOperand(0).getValueType()));
13692  } else if (User->getOpcode() == ISD::SELECT_CC) {
13693  if (User->getOperand(0) == Inputs[i])
13694  SelectTruncOp[0].insert(std::make_pair(User,
13695  User->getOperand(0).getValueType()));
13696  if (User->getOperand(1) == Inputs[i])
13697  SelectTruncOp[1].insert(std::make_pair(User,
13698  User->getOperand(1).getValueType()));
13699  }
13700  }
13701  }
13702 
13703  for (unsigned i = 0, ie = PromOps.size(); i != ie; ++i) {
13704  for (SDNode *User : PromOps[i].getNode()->uses()) {
13705  if (User != N && !Visited.count(User))
13706  return SDValue();
13707 
13708  // If we're going to promote the non-output-value operand(s) or SELECT or
13709  // SELECT_CC, record them for truncation.
13710  if (User->getOpcode() == ISD::SELECT) {
13711  if (User->getOperand(0) == PromOps[i])
13712  SelectTruncOp[0].insert(std::make_pair(User,
13713  User->getOperand(0).getValueType()));
13714  } else if (User->getOpcode() == ISD::SELECT_CC) {
13715  if (User->getOperand(0) == PromOps[i])
13716  SelectTruncOp[0].insert(std::make_pair(User,
13717  User->getOperand(0).getValueType()));
13718  if (User->getOperand(1) == PromOps[i])
13719  SelectTruncOp[1].insert(std::make_pair(User,
13720  User->getOperand(1).getValueType()));
13721  }
13722  }
13723  }
13724 
13725  unsigned PromBits = N->getOperand(0).getValueSizeInBits();
13726  bool ReallyNeedsExt = false;
13727  if (N->getOpcode() != ISD::ANY_EXTEND) {
13728  // If all of the inputs are not already sign/zero extended, then
13729  // we'll still need to do that at the end.
13730  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13731  if (isa<ConstantSDNode>(Inputs[i]))
13732  continue;
13733 
13734  unsigned OpBits =
13735  Inputs[i].getOperand(0).getValueSizeInBits();
13736  assert(PromBits < OpBits && "Truncation not to a smaller bit count?");
13737 
13738  if ((N->getOpcode() == ISD::ZERO_EXTEND &&
13739  !DAG.MaskedValueIsZero(Inputs[i].getOperand(0),
13740  APInt::getHighBitsSet(OpBits,
13741  OpBits-PromBits))) ||
13742  (N->getOpcode() == ISD::SIGN_EXTEND &&
13743  DAG.ComputeNumSignBits(Inputs[i].getOperand(0)) <
13744  (OpBits-(PromBits-1)))) {
13745  ReallyNeedsExt = true;
13746  break;
13747  }
13748  }
13749  }
13750 
13751  // Replace all inputs, either with the truncation operand, or a
13752  // truncation or extension to the final output type.
13753  for (unsigned i = 0, ie = Inputs.size(); i != ie; ++i) {
13754  // Constant inputs need to be replaced with the to-be-promoted nodes that
13755  // use them because they might have users outside of the cluster of
13756  // promoted nodes.
13757  if (isa<ConstantSDNode>(Inputs[i]))
13758  continue;
13759 
13760  SDValue InSrc = Inputs[i].getOperand(0);
13761  if (Inputs[i].getValueType() == N->getValueType(0))
13762  DAG.ReplaceAllUsesOfValueWith(Inputs[i], InSrc);
13763  else if (N->getOpcode() == ISD::SIGN_EXTEND)
13764  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13765  DAG.getSExtOrTrunc(InSrc, dl, N->getValueType(0)));
13766  else if (N->getOpcode() == ISD::ZERO_EXTEND)
13767  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13768  DAG.getZExtOrTrunc(InSrc, dl, N->getValueType(0)));
13769  else
13770  DAG.ReplaceAllUsesOfValueWith(Inputs[i],
13771  DAG.getAnyExtOrTrunc(InSrc, dl, N->getValueType(0)));
13772  }
13773 
13774  std::list<HandleSDNode> PromOpHandles;
13775  for (auto &PromOp : PromOps)
13776  PromOpHandles.emplace_back(PromOp);
13777 
13778  // Replace all operations (these are all the same, but have a different
13779  // (promoted) return type). DAG.getNode will validate that the types of
13780  // a binary operator match, so go through the list in reverse so that
13781  // we've likely promoted both operands first.
13782  while (!PromOpHandles.empty()) {
13783  SDValue PromOp = PromOpHandles.back().getValue();
13784  PromOpHandles.pop_back();
13785 
13786  unsigned C;
13787  switch (PromOp.getOpcode()) {
13788  default: C = 0; break;
13789  case ISD::SELECT: C = 1; break;
13790  case ISD::SELECT_CC: C = 2; break;
13791  }
13792 
13793  if ((!isa<ConstantSDNode>(PromOp.getOperand(C)) &&
13794  PromOp.getOperand(C).getValueType() != N->getValueType(0)) ||
13795  (!isa<ConstantSDNode>(PromOp.getOperand(C+1)) &&
13796  PromOp.getOperand(C+1).getValueType() != N->getValueType(0))) {
13797  // The to-be-promoted operands of this node have not yet been
13798  // promoted (this should be rare because we're going through the
13799  // list backward, but if one of the operands has several users in
13800  // this cluster of to-be-promoted nodes, it is possible).
13801  PromOpHandles.emplace_front(PromOp);
13802  continue;
13803  }
13804 
13805  // For SELECT and SELECT_CC nodes, we do a similar check for any
13806  // to-be-promoted comparison inputs.
13807  if (PromOp.getOpcode() == ISD::SELECT ||
13808  PromOp.getOpcode() == ISD::SELECT_CC) {
13809  if ((SelectTruncOp[0].count(PromOp.getNode()) &&
13810  PromOp.getOperand(0).getValueType() != N->getValueType(0)) ||
13811  (SelectTruncOp[1].count(PromOp.getNode()) &&
13812  PromOp.getOperand(1).getValueType() != N->getValueType(0))) {
13813  PromOpHandles.emplace_front(PromOp);
13814  continue;
13815  }
13816  }
13817 
13818  SmallVector<SDValue, 3> Ops(PromOp.getNode()->op_begin(),
13819  PromOp.getNode()->op_end());
13820 
13821  // If this node has constant inputs, then they'll need to be promoted here.
13822  for (unsigned i = 0; i < 2; ++i) {
13823  if (!isa<ConstantSDNode>(Ops[C+i]))
13824  continue;
13825  if (Ops[C+i].getValueType() == N->getValueType(0))
13826  continue;
13827 
13828  if (N->getOpcode() == ISD::SIGN_EXTEND)
13829  Ops[C+i] = DAG.getSExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13830  else if (N->getOpcode() == ISD::ZERO_EXTEND)
13831  Ops[C+i] = DAG.getZExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13832  else
13833  Ops[C+i] = DAG.getAnyExtOrTrunc(Ops[C+i], dl, N->getValueType(0));
13834  }
13835 
13836  // If we've promoted the comparison inputs of a SELECT or SELECT_CC,
13837  // truncate them again to the original value type.
13838  if (PromOp.getOpcode() == ISD::SELECT ||
13839  PromOp.getOpcode() == ISD::SELECT_CC) {
13840  auto SI0 = SelectTruncOp[0].find(PromOp.getNode());
13841  if (SI0 != SelectTruncOp[0].end())
13842  Ops[0] = DAG.getNode(ISD::TRUNCATE, dl, SI0->second, Ops[0]);
13843  auto SI1 = SelectTruncOp[1].find(PromOp.getNode());
13844  if (SI1 != SelectTruncOp[1].end())
13845  Ops[1] = DAG.getNode(ISD::TRUNCATE, dl, SI1->second, Ops[1]);
13846  }
13847 
13848  DAG.ReplaceAllUsesOfValueWith(PromOp,
13849  DAG.getNode(PromOp.getOpcode(), dl, N->getValueType(0), Ops));
13850  }
13851 
13852  // Now we're left with the initial extension itself.
13853  if (!ReallyNeedsExt)
13854  return N->getOperand(0);
13855 
13856  // To zero extend, just mask off everything except for the first bit (in the
13857  // i1 case).
13858  if (N->getOpcode() == ISD::ZERO_EXTEND)
13859  return DAG.getNode(ISD::AND, dl, N->getValueType(0), N->getOperand(0),
13861  N->getValueSizeInBits(0), PromBits),
13862  dl, N->getValueType(0)));
13863 
13864  assert(N->getOpcode() == ISD::SIGN_EXTEND &&
13865  "Invalid extension type");
13866  EVT ShiftAmountTy = getShiftAmountTy(N->getValueType(0), DAG.getDataLayout());
13867  SDValue ShiftCst =
13868  DAG.getConstant(N->getValueSizeInBits(0) - PromBits, dl, ShiftAmountTy);
13869  return DAG.getNode(
13870  ISD::SRA, dl, N->getValueType(0),
13871  DAG.getNode(ISD::SHL, dl, N->getValueType(0), N->getOperand(0), ShiftCst),
13872  ShiftCst);
13873 }
13874 
13875 SDValue PPCTargetLowering::combineSetCC(SDNode *N,
13876  DAGCombinerInfo &DCI) const {
13877  assert(N->getOpcode() == ISD::SETCC &&
13878  "Should be called with a SETCC node");
13879 
13880  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
13881  if (CC == ISD::SETNE || CC == ISD::SETEQ) {
13882  SDValue LHS = N->getOperand(0);
13883  SDValue RHS = N->getOperand(1);
13884 
13885  // If there is a '0 - y' pattern, canonicalize the pattern to the RHS.
13886  if (LHS.getOpcode() == ISD::SUB && isNullConstant(LHS.getOperand(0)) &&
13887  LHS.hasOneUse())
13888  std::swap(LHS, RHS);
13889 
13890  // x == 0-y --> x+y == 0
13891  // x != 0-y --> x+y != 0
13892  if (RHS.getOpcode() == ISD::SUB && isNullConstant(RHS.getOperand(0)) &&
13893  RHS.hasOneUse()) {
13894  SDLoc DL(N);
13895  SelectionDAG &DAG = DCI.DAG;
13896  EVT VT = N->getValueType(0);
13897  EVT OpVT = LHS.getValueType();
13898  SDValue Add = DAG.getNode(ISD::ADD, DL, OpVT, LHS, RHS.getOperand(1));
13899  return DAG.getSetCC(DL, VT, Add, DAG.getConstant(0, DL, OpVT), CC);
13900  }
13901  }
13902 
13903  return DAGCombineTruncBoolExt(N, DCI);
13904 }
13905 
13906 // Is this an extending load from an f32 to an f64?
13907 static bool isFPExtLoad(SDValue Op) {
13908  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op.getNode()))
13909  return LD->getExtensionType() == ISD::EXTLOAD &&
13910  Op.getValueType() == MVT::f64;
13911  return false;
13912 }
13913 
13914 /// Reduces the number of fp-to-int conversion when building a vector.
13915 ///
13916 /// If this vector is built out of floating to integer conversions,
13917 /// transform it to a vector built out of floating point values followed by a
13918 /// single floating to integer conversion of the vector.
13919 /// Namely (build_vector (fptosi $A), (fptosi $B), ...)
13920 /// becomes (fptosi (build_vector ($A, $B, ...)))
13921 SDValue PPCTargetLowering::
13922 combineElementTruncationToVectorTruncation(SDNode *N,
13923  DAGCombinerInfo &DCI) const {
13924  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
13925  "Should be called with a BUILD_VECTOR node");
13926 
13927  SelectionDAG &DAG = DCI.DAG;
13928  SDLoc dl(N);
13929 
13930  SDValue FirstInput = N->getOperand(0);
13931  assert(FirstInput.getOpcode() == PPCISD::MFVSR &&
13932  "The input operand must be an fp-to-int conversion.");
13933 
13934  // This combine happens after legalization so the fp_to_[su]i nodes are
13935  // already converted to PPCSISD nodes.
13936  unsigned FirstConversion = FirstInput.getOperand(0).getOpcode();
13937  if (FirstConversion == PPCISD::FCTIDZ ||
13938  FirstConversion == PPCISD::FCTIDUZ ||
13939  FirstConversion == PPCISD::FCTIWZ ||
13940  FirstConversion == PPCISD::FCTIWUZ) {
13941  bool IsSplat = true;
13942  bool Is32Bit = FirstConversion == PPCISD::FCTIWZ ||
13943  FirstConversion == PPCISD::FCTIWUZ;
13944  EVT SrcVT = FirstInput.getOperand(0).getValueType();
13946  EVT TargetVT = N->getValueType(0);
13947  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13948  SDValue NextOp = N->getOperand(i);
13949  if (NextOp.getOpcode() != PPCISD::MFVSR)
13950  return SDValue();
13951  unsigned NextConversion = NextOp.getOperand(0).getOpcode();
13952  if (NextConversion != FirstConversion)
13953  return SDValue();
13954  // If we are converting to 32-bit integers, we need to add an FP_ROUND.
13955  // This is not valid if the input was originally double precision. It is
13956  // also not profitable to do unless this is an extending load in which
13957  // case doing this combine will allow us to combine consecutive loads.
13958  if (Is32Bit && !isFPExtLoad(NextOp.getOperand(0).getOperand(0)))
13959  return SDValue();
13960  if (N->getOperand(i) != FirstInput)
13961  IsSplat = false;
13962  }
13963 
13964  // If this is a splat, we leave it as-is since there will be only a single
13965  // fp-to-int conversion followed by a splat of the integer. This is better
13966  // for 32-bit and smaller ints and neutral for 64-bit ints.
13967  if (IsSplat)
13968  return SDValue();
13969 
13970  // Now that we know we have the right type of node, get its operands
13971  for (int i = 0, e = N->getNumOperands(); i < e; ++i) {
13972  SDValue In = N->getOperand(i).getOperand(0);
13973  if (Is32Bit) {
13974  // For 32-bit values, we need to add an FP_ROUND node (if we made it
13975  // here, we know that all inputs are extending loads so this is safe).
13976  if (In.isUndef())
13977  Ops.push_back(DAG.getUNDEF(SrcVT));
13978  else {
13979  SDValue Trunc = DAG.getNode(ISD::FP_ROUND, dl,
13980  MVT::f32, In.getOperand(0),
13981  DAG.getIntPtrConstant(1, dl));
13982  Ops.push_back(Trunc);
13983  }
13984  } else
13985  Ops.push_back(In.isUndef() ? DAG.getUNDEF(SrcVT) : In.getOperand(0));
13986  }
13987 
13988  unsigned Opcode;
13989  if (FirstConversion == PPCISD::FCTIDZ ||
13990  FirstConversion == PPCISD::FCTIWZ)
13991  Opcode = ISD::FP_TO_SINT;
13992  else
13993  Opcode = ISD::FP_TO_UINT;
13994 
13995  EVT NewVT = TargetVT == MVT::v2i64 ? MVT::v2f64 : MVT::v4f32;
13996  SDValue BV = DAG.getBuildVector(NewVT, dl, Ops);
13997  return DAG.getNode(Opcode, dl, TargetVT, BV);
13998  }
13999  return SDValue();
14000 }
14001 
14002 /// Reduce the number of loads when building a vector.
14003 ///
14004 /// Building a vector out of multiple loads can be converted to a load
14005 /// of the vector type if the loads are consecutive. If the loads are
14006 /// consecutive but in descending order, a shuffle is added at the end
14007 /// to reorder the vector.
14009  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14010  "Should be called with a BUILD_VECTOR node");
14011 
14012  SDLoc dl(N);
14013 
14014  // Return early for non byte-sized type, as they can't be consecutive.
14015  if (!N->getValueType(0).getVectorElementType().isByteSized())
14016  return SDValue();
14017 
14018  bool InputsAreConsecutiveLoads = true;
14019  bool InputsAreReverseConsecutive = true;
14020  unsigned ElemSize = N->getValueType(0).getScalarType().getStoreSize();
14021  SDValue FirstInput = N->getOperand(0);
14022  bool IsRoundOfExtLoad = false;
14023 
14024  if (FirstInput.getOpcode() == ISD::FP_ROUND &&
14025  FirstInput.getOperand(0).getOpcode() == ISD::LOAD) {
14026  LoadSDNode *LD = dyn_cast<LoadSDNode>(FirstInput.getOperand(0));
14027  IsRoundOfExtLoad = LD->getExtensionType() == ISD::EXTLOAD;
14028  }
14029  // Not a build vector of (possibly fp_rounded) loads.
14030  if ((!IsRoundOfExtLoad && FirstInput.getOpcode() != ISD::LOAD) ||
14031  N->getNumOperands() == 1)
14032  return SDValue();
14033 
14034  for (int i = 1, e = N->getNumOperands(); i < e; ++i) {
14035  // If any inputs are fp_round(extload), they all must be.
14036  if (IsRoundOfExtLoad && N->getOperand(i).getOpcode() != ISD::FP_ROUND)
14037  return SDValue();
14038 
14039  SDValue NextInput = IsRoundOfExtLoad ? N->getOperand(i).getOperand(0) :
14040  N->getOperand(i);
14041  if (NextInput.getOpcode() != ISD::LOAD)
14042  return SDValue();
14043 
14044  SDValue PreviousInput =
14045  IsRoundOfExtLoad ? N->getOperand(i-1).getOperand(0) : N->getOperand(i-1);
14046  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(PreviousInput);
14047  LoadSDNode *LD2 = dyn_cast<LoadSDNode>(NextInput);
14048 
14049  // If any inputs are fp_round(extload), they all must be.
14050  if (IsRoundOfExtLoad && LD2->getExtensionType() != ISD::EXTLOAD)
14051  return SDValue();
14052 
14053  if (!isConsecutiveLS(LD2, LD1, ElemSize, 1, DAG))
14054  InputsAreConsecutiveLoads = false;
14055  if (!isConsecutiveLS(LD1, LD2, ElemSize, 1, DAG))
14056  InputsAreReverseConsecutive = false;
14057 
14058  // Exit early if the loads are neither consecutive nor reverse consecutive.
14059  if (!InputsAreConsecutiveLoads && !InputsAreReverseConsecutive)
14060  return SDValue();
14061  }
14062 
14063  assert(!(InputsAreConsecutiveLoads && InputsAreReverseConsecutive) &&
14064  "The loads cannot be both consecutive and reverse consecutive.");
14065 
14066  SDValue FirstLoadOp =
14067  IsRoundOfExtLoad ? FirstInput.getOperand(0) : FirstInput;
14068  SDValue LastLoadOp =
14069  IsRoundOfExtLoad ? N->getOperand(N->getNumOperands()-1).getOperand(0) :
14070  N->getOperand(N->getNumOperands()-1);
14071 
14072  LoadSDNode *LD1 = dyn_cast<LoadSDNode>(FirstLoadOp);
14073  LoadSDNode *LDL = dyn_cast<LoadSDNode>(LastLoadOp);
14074  if (InputsAreConsecutiveLoads) {
14075  assert(LD1 && "Input needs to be a LoadSDNode.");
14076  return DAG.getLoad(N->getValueType(0), dl, LD1->getChain(),
14077  LD1->getBasePtr(), LD1->getPointerInfo(),
14078  LD1->getAlignment());
14079  }
14080  if (InputsAreReverseConsecutive) {
14081  assert(LDL && "Input needs to be a LoadSDNode.");
14082  SDValue Load = DAG.getLoad(N->getValueType(0), dl, LDL->getChain(),
14083  LDL->getBasePtr(), LDL->getPointerInfo(),
14084  LDL->getAlignment());
14086  for (int i = N->getNumOperands() - 1; i >= 0; i--)
14087  Ops.push_back(i);
14088 
14089  return DAG.getVectorShuffle(N->getValueType(0), dl, Load,
14090  DAG.getUNDEF(N->getValueType(0)), Ops);
14091  }
14092  return SDValue();
14093 }
14094 
14095 // This function adds the required vector_shuffle needed to get
14096 // the elements of the vector extract in the correct position
14097 // as specified by the CorrectElems encoding.
14099  SDValue Input, uint64_t Elems,
14100  uint64_t CorrectElems) {
14101  SDLoc dl(N);
14102 
14103  unsigned NumElems = Input.getValueType().getVectorNumElements();
14104  SmallVector<int, 16> ShuffleMask(NumElems, -1);
14105 
14106  // Knowing the element indices being extracted from the original
14107  // vector and the order in which they're being inserted, just put
14108  // them at element indices required for the instruction.
14109  for (unsigned i = 0; i < N->getNumOperands(); i++) {
14110  if (DAG.getDataLayout().isLittleEndian())
14111  ShuffleMask[CorrectElems & 0xF] = Elems & 0xF;
14112  else
14113  ShuffleMask[(CorrectElems & 0xF0) >> 4] = (Elems & 0xF0) >> 4;
14114  CorrectElems = CorrectElems >> 8;
14115  Elems = Elems >> 8;
14116  }
14117 
14118  SDValue Shuffle =
14119  DAG.getVectorShuffle(Input.getValueType(), dl, Input,
14120  DAG.getUNDEF(Input.getValueType()), ShuffleMask);
14121 
14122  EVT VT = N->getValueType(0);
14123  SDValue Conv = DAG.getBitcast(VT, Shuffle);
14124 
14125  EVT ExtVT = EVT::getVectorVT(*DAG.getContext(),
14127  VT.getVectorNumElements());
14128  return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, VT, Conv,
14129  DAG.getValueType(ExtVT));
14130 }
14131 
14132 // Look for build vector patterns where input operands come from sign
14133 // extended vector_extract elements of specific indices. If the correct indices
14134 // aren't used, add a vector shuffle to fix up the indices and create
14135 // SIGN_EXTEND_INREG node which selects the vector sign extend instructions
14136 // during instruction selection.
14138  // This array encodes the indices that the vector sign extend instructions
14139  // extract from when extending from one type to another for both BE and LE.
14140  // The right nibble of each byte corresponds to the LE incides.
14141  // and the left nibble of each byte corresponds to the BE incides.
14142  // For example: 0x3074B8FC byte->word
14143  // For LE: the allowed indices are: 0x0,0x4,0x8,0xC
14144  // For BE: the allowed indices are: 0x3,0x7,0xB,0xF
14145  // For example: 0x000070F8 byte->double word
14146  // For LE: the allowed indices are: 0x0,0x8
14147  // For BE: the allowed indices are: 0x7,0xF
14148  uint64_t TargetElems[] = {
14149  0x3074B8FC, // b->w
14150  0x000070F8, // b->d
14151  0x10325476, // h->w
14152  0x00003074, // h->d
14153  0x00001032, // w->d
14154  };
14155 
14156  uint64_t Elems = 0;
14157  int Index;
14158  SDValue Input;
14159 
14160  auto isSExtOfVecExtract = [&](SDValue Op) -> bool {
14161  if (!Op)
14162  return false;
14163  if (Op.getOpcode() != ISD::SIGN_EXTEND &&
14164  Op.getOpcode() != ISD::SIGN_EXTEND_INREG)
14165  return false;
14166 
14167  // A SIGN_EXTEND_INREG might be fed by an ANY_EXTEND to produce a value
14168  // of the right width.
14169  SDValue Extract = Op.getOperand(0);
14170  if (Extract.getOpcode() == ISD::ANY_EXTEND)
14171  Extract = Extract.getOperand(0);
14172  if (Extract.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
14173  return false;
14174 
14175  ConstantSDNode *ExtOp = dyn_cast<ConstantSDNode>(Extract.getOperand(1));
14176  if (!ExtOp)
14177  return false;
14178 
14179  Index = ExtOp->getZExtValue();
14180  if (Input && Input != Extract.getOperand(0))
14181  return false;
14182 
14183  if (!Input)
14184  Input = Extract.getOperand(0);
14185 
14186  Elems = Elems << 8;
14187  Index = DAG.getDataLayout().isLittleEndian() ? Index : Index << 4;
14188  Elems |= Index;
14189 
14190  return true;
14191  };
14192 
14193  // If the build vector operands aren't sign extended vector extracts,
14194  // of the same input vector, then return.
14195  for (unsigned i = 0; i < N->getNumOperands(); i++) {
14196  if (!isSExtOfVecExtract(N->getOperand(i))) {
14197  return SDValue();
14198  }
14199  }
14200 
14201  // If the vector extract indicies are not correct, add the appropriate
14202  // vector_shuffle.
14203  int TgtElemArrayIdx;
14204  int InputSize = Input.getValueType().getScalarSizeInBits();
14205  int OutputSize = N->getValueType(0).getScalarSizeInBits();
14206  if (InputSize + OutputSize == 40)
14207  TgtElemArrayIdx = 0;
14208  else if (InputSize + OutputSize == 72)
14209  TgtElemArrayIdx = 1;
14210  else if (InputSize + OutputSize == 48)
14211  TgtElemArrayIdx = 2;
14212  else if (InputSize + OutputSize == 80)
14213  TgtElemArrayIdx = 3;
14214  else if (InputSize + OutputSize == 96)
14215  TgtElemArrayIdx = 4;
14216  else
14217  return SDValue();
14218 
14219  uint64_t CorrectElems = TargetElems[TgtElemArrayIdx];
14220  CorrectElems = DAG.getDataLayout().isLittleEndian()
14221  ? CorrectElems & 0x0F0F0F0F0F0F0F0F
14222  : CorrectElems & 0xF0F0F0F0F0F0F0F0;
14223  if (Elems != CorrectElems) {
14224  return addShuffleForVecExtend(N, DAG, Input, Elems, CorrectElems);
14225  }
14226 
14227  // Regular lowering will catch cases where a shuffle is not needed.
14228  return SDValue();
14229 }
14230 
14231 // Look for the pattern of a load from a narrow width to i128, feeding
14232 // into a BUILD_VECTOR of v1i128. Replace this sequence with a PPCISD node
14233 // (LXVRZX). This node represents a zero extending load that will be matched
14234 // to the Load VSX Vector Rightmost instructions.
14236  SDLoc DL(N);
14237 
14238  // This combine is only eligible for a BUILD_VECTOR of v1i128.
14239  if (N->getValueType(0) != MVT::v1i128)
14240  return SDValue();
14241 
14242  SDValue Operand = N->getOperand(0);
14243  // Proceed with the transformation if the operand to the BUILD_VECTOR
14244  // is a load instruction.
14245  if (Operand.getOpcode() != ISD::LOAD)
14246  return SDValue();
14247 
14248  auto *LD = cast<LoadSDNode>(Operand);
14249  EVT MemoryType = LD->getMemoryVT();
14250 
14251  // This transformation is only valid if the we are loading either a byte,
14252  // halfword, word, or doubleword.
14253  bool ValidLDType = MemoryType == MVT::i8 || MemoryType == MVT::i16 ||
14255 
14256  // Ensure that the load from the narrow width is being zero extended to i128.
14257  if (!ValidLDType ||
14258  (LD->getExtensionType() != ISD::ZEXTLOAD &&
14259  LD->getExtensionType() != ISD::EXTLOAD))
14260  return SDValue();
14261 
14262  SDValue LoadOps[] = {
14263  LD->getChain(), LD->getBasePtr(),
14264  DAG.getIntPtrConstant(MemoryType.getScalarSizeInBits(), DL)};
14265 
14268  LoadOps, MemoryType, LD->getMemOperand());
14269 }
14270 
14271 SDValue PPCTargetLowering::DAGCombineBuildVector(SDNode *N,
14272  DAGCombinerInfo &DCI) const {
14273  assert(N->getOpcode() == ISD::BUILD_VECTOR &&
14274  "Should be called with a BUILD_VECTOR node");
14275 
14276  SelectionDAG &DAG = DCI.DAG;
14277  SDLoc dl(N);
14278 
14279  if (!Subtarget.hasVSX())
14280  return SDValue();
14281 
14282  // The target independent DAG combiner will leave a build_vector of
14283  // float-to-int conversions intact. We can generate MUCH better code for
14284  // a float-to-int conversion of a vector of floats.
14285  SDValue FirstInput = N->getOperand(0);
14286  if (FirstInput.getOpcode() == PPCISD::MFVSR) {
14287  SDValue Reduced = combineElementTruncationToVectorTruncation(N, DCI);
14288  if (Reduced)
14289  return Reduced;
14290  }
14291 
14292  // If we're building a vector out of consecutive loads, just load that
14293  // vector type.
14294  SDValue Reduced = combineBVOfConsecutiveLoads(N, DAG);
14295  if (Reduced)
14296  return Reduced;
14297 
14298  // If we're building a vector out of extended elements from another vector
14299  // we have P9 vector integer extend instructions. The code assumes legal
14300  // input types (i.e. it can't handle things like v4i16) so do not run before
14301  // legalization.
14302  if (Subtarget.hasP9Altivec() && !DCI.isBeforeLegalize()) {
14303  Reduced = combineBVOfVecSExt(N, DAG);
14304  if (Reduced)
14305  return Reduced;
14306  }
14307 
14308  // On Power10, the Load VSX Vector Rightmost instructions can be utilized
14309  // if this is a BUILD_VECTOR of v1i128, and if the operand to the BUILD_VECTOR
14310  // is a load from <valid narrow width> to i128.
14311  if (Subtarget.isISA3_1()) {
14312  SDValue BVOfZLoad = combineBVZEXTLOAD(N, DAG);
14313  if (BVOfZLoad)
14314  return BVOfZLoad;
14315  }
14316 
14317  if (N->getValueType(0) != MVT::v2f64)
14318  return SDValue();
14319 
14320  // Looking for:
14321  // (build_vector ([su]int_to_fp (extractelt 0)), [su]int_to_fp (extractelt 1))
14322  if (FirstInput.getOpcode() != ISD::SINT_TO_FP &&
14323  FirstInput.getOpcode() != ISD::UINT_TO_FP)
14324  return SDValue();
14325  if (N->getOperand(1).getOpcode() != ISD::SINT_TO_FP &&
14326  N->getOperand(1).getOpcode() != ISD::UINT_TO_FP)
14327  return SDValue();
14328  if (FirstInput.getOpcode() != N->getOperand(1).getOpcode())
14329  return SDValue();
14330 
14331  SDValue Ext1 = FirstInput.getOperand(0);
14332  SDValue Ext2 = N->getOperand(1).getOperand(0);
14333  if(Ext1.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
14335  return SDValue();
14336 
14337  ConstantSDNode *Ext1Op = dyn_cast<ConstantSDNode>(Ext1.getOperand(1));
14338  ConstantSDNode *Ext2Op = dyn_cast<ConstantSDNode>(Ext2.getOperand(1));
14339  if (!Ext1Op || !Ext2Op)
14340  return SDValue();
14341  if (Ext1.getOperand(0).getValueType() != MVT::v4i32 ||
14342  Ext1.getOperand(0) != Ext2.getOperand(0))
14343  return SDValue();
14344 
14345  int FirstElem = Ext1Op->getZExtValue();
14346  int SecondElem = Ext2Op->getZExtValue();
14347  int SubvecIdx;
14348  if (FirstElem == 0 && SecondElem == 1)
14349  SubvecIdx = Subtarget.isLittleEndian() ? 1 : 0;
14350  else if (FirstElem == 2 && SecondElem == 3)
14351  SubvecIdx = Subtarget.isLittleEndian() ? 0 : 1;
14352  else
14353  return SDValue();
14354 
14355  SDValue SrcVec = Ext1.getOperand(0);
14356  auto NodeType = (N->getOperand(1).getOpcode() == ISD::SINT_TO_FP) ?
14358  return DAG.getNode(NodeType, dl, MVT::v2f64,
14359  SrcVec, DAG.getIntPtrConstant(SubvecIdx, dl));
14360 }
14361 
14362 SDValue PPCTargetLowering::combineFPToIntToFP(SDNode *N,
14363  DAGCombinerInfo &DCI) const {
14364  assert((N->getOpcode() == ISD::SINT_TO_FP ||
14365  N->getOpcode() == ISD::UINT_TO_FP) &&
14366  "Need an int -> FP conversion node here");
14367 
14368  if (useSoftFloat() || !Subtarget.has64BitSupport())
14369  return SDValue();
14370 
14371  SelectionDAG &DAG = DCI.DAG;
14372  SDLoc dl(N);
14373  SDValue Op(N, 0);
14374 
14375  // Don't handle ppc_fp128 here or conversions that are out-of-range capable
14376  // from the hardware.
14377  if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64)
14378  return SDValue();
14379  if (!Op.getOperand(0).getValueType().isSimple())
14380  return SDValue();
14381  if (Op.getOperand(0).getValueType().getSimpleVT() <= MVT(MVT::i1) ||
14382  Op.getOperand(0).getValueType().getSimpleVT() > MVT(MVT::i64))
14383  return SDValue();
14384 
14385  SDValue FirstOperand(Op.getOperand(0));
14386  bool SubWordLoad = FirstOperand.getOpcode() == ISD::LOAD &&
14387  (FirstOperand.getValueType() == MVT::i8 ||
14388  FirstOperand.getValueType() == MVT::i16);
14389  if (Subtarget.hasP9Vector() && Subtarget.hasP9Altivec() && SubWordLoad) {
14390  bool Signed = N->getOpcode() == ISD::SINT_TO_FP;
14391  bool DstDouble = Op.getValueType() == MVT::f64;
14392  unsigned ConvOp = Signed ?
14393  (DstDouble ? PPCISD::FCFID : PPCISD::FCFIDS) :
14394  (DstDouble ? PPCISD::FCFIDU : PPCISD::FCFIDUS);
14395  SDValue WidthConst =
14396  DAG.getIntPtrConstant(FirstOperand.getValueType() == MVT::i8 ? 1 : 2,
14397  dl, false);
14398  LoadSDNode *LDN = cast<LoadSDNode>(FirstOperand.getNode());
14399  SDValue Ops[] = { LDN->getChain(), LDN->getBasePtr(), WidthConst };
14402  Ops, MVT::i8, LDN->getMemOperand());
14403 
14404  // For signed conversion, we need to sign-extend the value in the VSR
14405  if (Signed) {
14406  SDValue ExtOps[] = { Ld, WidthConst };
14407  SDValue Ext = DAG.getNode(PPCISD::VEXTS, dl, MVT::f64, ExtOps);
14408  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ext);
14409  } else
14410  return DAG.getNode(ConvOp, dl, DstDouble ? MVT::f64 : MVT::f32, Ld);
14411  }
14412 
14413 
14414  // For i32 intermediate values, unfortunately, the conversion functions
14415  // leave the upper 32 bits of the value are undefined. Within the set of
14416  // scalar instructions, we have no method for zero- or sign-extending the
14417  // value. Thus, we cannot handle i32 intermediate values here.
14418  if (Op.getOperand(0).getValueType() == MVT::i32)
14419  return SDValue();
14420 
14421  assert((Op.getOpcode() == ISD::SINT_TO_FP || Subtarget.hasFPCVT()) &&
14422  "UINT_TO_FP is supported only with FPCVT");
14423 
14424  // If we have FCFIDS, then use it when converting to single-precision.
14425  // Otherwise, convert to double-precision and then round.
14426  unsigned FCFOp = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14427  ? (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDUS
14428  : PPCISD::FCFIDS)
14429  : (Op.getOpcode() == ISD::UINT_TO_FP ? PPCISD::FCFIDU
14430  : PPCISD::FCFID);
14431  MVT FCFTy = (Subtarget.hasFPCVT() && Op.getValueType() == MVT::f32)
14432  ? MVT::f32
14433  : MVT::f64;
14434 
14435  // If we're converting from a float, to an int, and back to a float again,
14436  // then we don't need the store/load pair at all.
14437  if ((Op.getOperand(0).getOpcode() == ISD::FP_TO_UINT &&
14438  Subtarget.hasFPCVT()) ||
14439  (Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT)) {
14440  SDValue Src = Op.getOperand(0).getOperand(0);
14441  if (Src.getValueType() == MVT::f32) {
14442  Src = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Src);
14443  DCI.AddToWorklist(Src.getNode());
14444  } else if (Src.getValueType() != MVT::f64) {
14445  // Make sure that we don't pick up a ppc_fp128 source value.
14446  return SDValue();
14447  }
14448 
14449  unsigned FCTOp =
14450  Op.getOperand(0).getOpcode() == ISD::FP_TO_SINT ? PPCISD::FCTIDZ :
14452 
14453  SDValue Tmp = DAG.getNode(FCTOp, dl, MVT::f64, Src);
14454  SDValue FP = DAG.getNode(FCFOp, dl, FCFTy, Tmp);
14455 
14456  if (Op.getValueType() == MVT::f32 && !Subtarget.hasFPCVT()) {
14457  FP = DAG.getNode(ISD::FP_ROUND, dl,
14458  MVT::f32, FP, DAG.getIntPtrConstant(0, dl));
14459  DCI.AddToWorklist(FP.getNode());
14460  }
14461 
14462  return FP;
14463  }
14464 
14465  return SDValue();
14466 }
14467 
14468 // expandVSXLoadForLE - Convert VSX loads (which may be intrinsics for
14469 // builtins) into loads with swaps.
14471  DAGCombinerInfo &DCI) const {
14472  SelectionDAG &DAG = DCI.DAG;
14473  SDLoc dl(N);
14474  SDValue Chain;
14475  SDValue Base;
14476  MachineMemOperand *MMO;
14477 
14478  switch (N->getOpcode()) {
14479  default:
14480  llvm_unreachable("Unexpected opcode for little endian VSX load");
14481  case ISD::LOAD: {
14482  LoadSDNode *LD = cast<LoadSDNode>(N);
14483  Chain = LD->getChain();
14484  Base = LD->getBasePtr();
14485  MMO = LD->getMemOperand();
14486  // If the MMO suggests this isn't a load of a full vector, leave
14487  // things alone. For a built-in, we have to make the change for
14488  // correctness, so if there is a size problem that will be a bug.
14489  if (MMO->getSize() < 16)
14490  return SDValue();
14491  break;
14492  }
14493  case ISD::INTRINSIC_W_CHAIN: {
14494  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14495  Chain = Intrin->getChain();
14496  // Similarly to the store case below, Intrin->getBasePtr() doesn't get
14497  // us what we want. Get operand 2 instead.
14498  Base = Intrin->getOperand(2);
14499  MMO = Intrin->getMemOperand();
14500  break;
14501  }
14502  }
14503 
14504  MVT VecTy = N->getValueType(0).getSimpleVT();
14505 
14506  // Do not expand to PPCISD::LXVD2X + PPCISD::XXSWAPD when the load is
14507  // aligned and the type is a vector with elements up to 4 bytes
14508  if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14509  VecTy.getScalarSizeInBits() <= 32) {
14510  return SDValue();
14511  }
14512 
14513  SDValue LoadOps[] = { Chain, Base };
14516  LoadOps, MVT::v2f64, MMO);
14517 
14518  DCI.AddToWorklist(Load.getNode());
14519  Chain = Load.getValue(1);
14520  SDValue Swap = DAG.getNode(
14521  PPCISD::XXSWAPD, dl, DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Load);
14522  DCI.AddToWorklist(Swap.getNode());
14523 
14524  // Add a bitcast if the resulting load type doesn't match v2f64.
14525  if (VecTy != MVT::v2f64) {
14526  SDValue N = DAG.getNode(ISD::BITCAST, dl, VecTy, Swap);
14527  DCI.AddToWorklist(N.getNode());
14528  // Package {bitcast value, swap's chain} to match Load's shape.
14529  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(VecTy, MVT::Other),
14530  N, Swap.getValue(1));
14531  }
14532 
14533  return Swap;
14534 }
14535 
14536 // expandVSXStoreForLE - Convert VSX stores (which may be intrinsics for
14537 // builtins) into stores with swaps.
14539  DAGCombinerInfo &DCI) const {
14540  SelectionDAG &DAG = DCI.DAG;
14541  SDLoc dl(N);
14542  SDValue Chain;
14543  SDValue Base;
14544  unsigned SrcOpnd;
14545  MachineMemOperand *MMO;
14546 
14547  switch (N->getOpcode()) {
14548  default:
14549  llvm_unreachable("Unexpected opcode for little endian VSX store");
14550  case ISD::STORE: {
14551  StoreSDNode *ST = cast<StoreSDNode>(N);
14552  Chain = ST->getChain();
14553  Base = ST->getBasePtr();
14554  MMO = ST->getMemOperand();
14555  SrcOpnd = 1;
14556  // If the MMO suggests this isn't a store of a full vector, leave
14557  // things alone. For a built-in, we have to make the change for
14558  // correctness, so if there is a size problem that will be a bug.
14559  if (MMO->getSize() < 16)
14560  return SDValue();
14561  break;
14562  }
14563  case ISD::INTRINSIC_VOID: {
14564  MemIntrinsicSDNode *Intrin = cast<MemIntrinsicSDNode>(N);
14565  Chain = Intrin->getChain();
14566  // Intrin->getBasePtr() oddly does not get what we want.
14567  Base = Intrin->getOperand(3);
14568  MMO = Intrin->getMemOperand();
14569  SrcOpnd = 2;
14570  break;
14571  }
14572  }
14573 
14574  SDValue Src = N->getOperand(SrcOpnd);
14575  MVT VecTy = Src.getValueType().getSimpleVT();
14576 
14577  // Do not expand to PPCISD::XXSWAPD and PPCISD::STXVD2X when the load is
14578  // aligned and the type is a vector with elements up to 4 bytes
14579  if (Subtarget.needsSwapsForVSXMemOps() && MMO->getAlign() >= Align(16) &&
14580  VecTy.getScalarSizeInBits() <= 32) {
14581  return SDValue();
14582  }
14583 
14584  // All stores are done as v2f64 and possible bit cast.
14585  if (VecTy != MVT::v2f64) {
14586  Src = DAG.getNode(ISD::BITCAST, dl, MVT::v2f64, Src);
14587  DCI.AddToWorklist(Src.getNode());
14588  }
14589 
14590  SDValue Swap = DAG.getNode(PPCISD::XXSWAPD, dl,
14591  DAG.getVTList(MVT::v2f64, MVT::Other), Chain, Src);
14592  DCI.AddToWorklist(Swap.getNode());
14593  Chain = Swap.getValue(1);
14594  SDValue StoreOps[] = { Chain, Swap, Base };
14596  DAG.getVTList(MVT::Other),
14597  StoreOps, VecTy, MMO);
14598  DCI.AddToWorklist(Store.getNode());
14599  return Store;
14600 }
14601 
14602 // Handle DAG combine for STORE (FP_TO_INT F).
14603 SDValue PPCTargetLowering::combineStoreFPToInt(SDNode *N,
14604  DAGCombinerInfo &DCI) const {
14605 
14606  SelectionDAG &DAG = DCI.DAG;
14607  SDLoc dl(N);
14608  unsigned Opcode = N->getOperand(1).getOpcode();
14609 
14610  assert((Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT)
14611  && "Not a FP_TO_INT Instruction!");
14612 
14613  SDValue Val = N->getOperand(1).getOperand(0);
14614  EVT Op1VT = N->getOperand(1).getValueType();
14615  EVT ResVT = Val.getValueType();
14616 
14617  if (!isTypeLegal(ResVT))
14618  return SDValue();
14619 
14620  // Only perform combine for conversion to i64/i32 or power9 i16/i8.
14621  bool ValidTypeForStoreFltAsInt =
14622  (Op1VT == MVT::i32 || Op1VT == MVT::i64 ||
14623  (Subtarget.hasP9Vector() && (Op1VT == MVT::i16 || Op1VT == MVT::i8)));
14624 
14625  if (ResVT == MVT::f128 && !Subtarget.hasP9Vector())
14626  return SDValue();
14627 
14628  if (ResVT == MVT::ppcf128 || !Subtarget.hasP8Vector() ||
14629  cast<StoreSDNode>(N)->isTruncatingStore() || !ValidTypeForStoreFltAsInt)
14630  return SDValue();
14631 
14632  // Extend f32 values to f64
14633  if (ResVT.getScalarSizeInBits() == 32) {
14634  Val = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f64, Val);
14635  DCI.AddToWorklist(Val.getNode());
14636  }
14637 
14638  // Set signed or unsigned conversion opcode.
14639  unsigned ConvOpcode = (Opcode == ISD::FP_TO_SINT) ?
14642 
14643  Val = DAG.getNode(ConvOpcode,
14644  dl, ResVT == MVT::f128 ? MVT::f128 : MVT::f64, Val);
14645  DCI.AddToWorklist(Val.getNode());
14646 
14647  // Set number of bytes being converted.
14648  unsigned ByteSize = Op1VT.getScalarSizeInBits() / 8;
14649  SDValue Ops[] = { N->getOperand(0), Val, N->getOperand(2),
14650  DAG.getIntPtrConstant(ByteSize, dl, false),
14651  DAG.getValueType(Op1VT) };
14652 
14654  DAG.getVTList(MVT::Other), Ops,
14655  cast<StoreSDNode>(N)->getMemoryVT(),
14656  cast<StoreSDNode>(N)->getMemOperand());
14657 
14658  DCI.AddToWorklist(Val.getNode());
14659  return Val;
14660 }
14661 
14662 static bool isAlternatingShuffMask(const ArrayRef<int> &Mask, int NumElts) {
14663  // Check that the source of the element keeps flipping
14664  // (i.e. Mask[i] < NumElts -> Mask[i+i] >= NumElts).
14665  bool PrevElemFromFirstVec = Mask[0] < NumElts;
14666  for (int i = 1, e = Mask.size(); i < e; i++) {
14667  if (PrevElemFromFirstVec && Mask[i] < NumElts)
14668  return false;
14669  if (!PrevElemFromFirstVec && Mask[i] >= NumElts)
14670  return false;
14671  PrevElemFromFirstVec = !PrevElemFromFirstVec;
14672  }
14673  return true;
14674 }
14675 
14676 static bool isSplatBV(SDValue Op) {
14677  if (Op.getOpcode() != ISD::BUILD_VECTOR)
14678  return false;
14679  SDValue FirstOp;
14680 
14681  // Find first non-undef input.
14682  for (int i = 0, e = Op.getNumOperands(); i < e; i++) {
14683  FirstOp = Op.getOperand(i);
14684  if (!FirstOp.isUndef())
14685  break;
14686  }
14687 
14688  // All inputs are undef or the same as the first non-undef input.
14689  for (int i = 1, e = Op.getNumOperands(); i < e; i++)
14690  if (Op.getOperand(i) != FirstOp && !Op.getOperand(i).isUndef())
14691  return false;
14692  return true;
14693 }
14694 
14696  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14697  return Op;
14698  if (Op.getOpcode() != ISD::BITCAST)
14699  return SDValue();
14700  Op = Op.getOperand(0);
14701  if (Op.getOpcode() == ISD::SCALAR_TO_VECTOR)
14702  return Op;
14703  return SDValue();
14704 }
14705 
14706 // Fix up the shuffle mask to account for the fact that the result of
14707 // scalar_to_vector is not in lane zero. This just takes all values in
14708 // the ranges specified by the min/max indices and adds the number of
14709 // elements required to ensure each element comes from the respective
14710 // position in the valid lane.
14711 // On little endian, that's just the corresponding element in the other
14712 // half of the vector. On big endian, it is in the same half but right
14713 // justified rather than left justified in that half.
14715  int LHSMaxIdx, int RHSMinIdx,
14716  int RHSMaxIdx, int HalfVec,
14717  unsigned ValidLaneWidth,
14718  const PPCSubtarget &Subtarget) {
14719  for (int i = 0, e = ShuffV.size(); i < e; i++) {
14720  int Idx = ShuffV[i];
14721  if ((Idx >= 0 && Idx < LHSMaxIdx) || (Idx >= RHSMinIdx && Idx < RHSMaxIdx))
14722  ShuffV[i] +=
14723  Subtarget.isLittleEndian() ? HalfVec : HalfVec - ValidLaneWidth;
14724  }
14725 }
14726 
14727 // Replace a SCALAR_TO_VECTOR with a SCALAR_TO_VECTOR_PERMUTED except if
14728 // the original is:
14729 // (<n x Ty> (scalar_to_vector (Ty (extract_elt <n x Ty> %a, C))))
14730 // In such a case, just change the shuffle mask to extract the element
14731 // from the permuted index.
14733  const PPCSubtarget &Subtarget) {
14734  SDLoc dl(OrigSToV);
14735  EVT VT = OrigSToV.getValueType();
14736  assert(OrigSToV.getOpcode() == ISD::SCALAR_TO_VECTOR &&
14737  "Expecting a SCALAR_TO_VECTOR here");
14738  SDValue Input = OrigSToV.getOperand(0);
14739 
14740  if (Input.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
14741  ConstantSDNode *Idx = dyn_cast<ConstantSDNode>(Input.getOperand(1));
14742  SDValue OrigVector = Input.getOperand(0);
14743 
14744  // Can't handle non-const element indices or different vector types
14745  // for the input to the extract and the output of the scalar_to_vector.
14746  if (Idx && VT == OrigVector.getValueType()) {
14747  unsigned NumElts = VT.getVectorNumElements();
14748  assert(
14749  NumElts > 1 &&
14750  "Cannot produce a permuted scalar_to_vector for one element vector");
14751  SmallVector<int, 16> NewMask(NumElts, -1);
14752  unsigned ResultInElt = NumElts / 2;
14753  ResultInElt -= Subtarget.isLittleEndian() ? 0 : 1;
14754  NewMask[ResultInElt] = Idx->getZExtValue();
14755  return DAG.getVectorShuffle(VT, dl, OrigVector, OrigVector, NewMask);
14756  }
14757  }
14758  return DAG.getNode(PPCISD::SCALAR_TO_VECTOR_PERMUTED, dl, VT,
14759  OrigSToV.getOperand(0));
14760 }
14761 
14762 // On little endian subtargets, combine shuffles such as:
14763 // vector_shuffle<16,1,17,3,18,5,19,7,20,9,21,11,22,13,23,15>, <zero>, %b
14764 // into:
14765 // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7>, <zero>, %b
14766 // because the latter can be matched to a single instruction merge.
14767 // Furthermore, SCALAR_TO_VECTOR on little endian always involves a permute
14768 // to put the value into element zero. Adjust the shuffle mask so that the
14769 // vector can remain in permuted form (to prevent a swap prior to a shuffle).
14770 // On big endian targets, this is still useful for SCALAR_TO_VECTOR
14771 // nodes with elements smaller than doubleword because all the ways
14772 // of getting scalar data into a vector register put the value in the
14773 // rightmost element of the left half of the vector.
14774 SDValue PPCTargetLowering::combineVectorShuffle(ShuffleVectorSDNode *SVN,
14775  SelectionDAG &DAG) const {
14776  SDValue LHS = SVN->getOperand(0);
14777  SDValue RHS = SVN->getOperand(1);
14778  auto Mask = SVN->getMask();
14779  int NumElts = LHS.getValueType().getVectorNumElements();
14780  SDValue Res(SVN, 0);
14781  SDLoc dl(SVN);
14782  bool IsLittleEndian = Subtarget.isLittleEndian();
14783 
14784  // On big endian targets this is only useful for subtargets with direct moves.
14785  // On little endian targets it would be useful for all subtargets with VSX.
14786  // However adding special handling for LE subtargets without direct moves
14787  // would be wasted effort since the minimum arch for LE is ISA 2.07 (Power8)
14788  // which includes direct moves.
14789  if (!Subtarget.hasDirectMove())
14790  return Res;
14791 
14792  // If this is not a shuffle of a shuffle and the first element comes from
14793  // the second vector, canonicalize to the commuted form. This will make it
14794  // more likely to match one of the single instruction patterns.
14795  if (Mask[0] >= NumElts && LHS.getOpcode() != ISD::VECTOR_SHUFFLE &&
14796  RHS.getOpcode() != ISD::VECTOR_SHUFFLE) {
14797  std::swap(LHS, RHS);
14798  Res = DAG.getCommutedVectorShuffle(*SVN);
14799  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14800  }
14801 
14802  // Adjust the shuffle mask if either input vector comes from a
14803  // SCALAR_TO_VECTOR and keep the respective input vector in permuted
14804  // form (to prevent the need for a swap).
14805  SmallVector<int, 16> ShuffV(Mask.begin(), Mask.end());
14806  SDValue SToVLHS = isScalarToVec(LHS);
14807  SDValue SToVRHS = isScalarToVec(RHS);
14808  if (SToVLHS || SToVRHS) {
14809  int NumEltsIn = SToVLHS ? SToVLHS.getValueType().getVectorNumElements()
14810  : SToVRHS.getValueType().getVectorNumElements();
14811  int NumEltsOut = ShuffV.size();
14812  // The width of the "valid lane" (i.e. the lane that contains the value that
14813  // is vectorized) needs to be expressed in terms of the number of elements
14814  // of the shuffle. It is thereby the ratio of the values before and after
14815  // any bitcast.
14816  unsigned ValidLaneWidth =
14817  SToVLHS ? SToVLHS.getValueType().getScalarSizeInBits() /
14818  LHS.getValueType().getScalarSizeInBits()
14819  : SToVRHS.getValueType().getScalarSizeInBits() /
14820  RHS.getValueType().getScalarSizeInBits();
14821 
14822  // Initially assume that neither input is permuted. These will be adjusted
14823  // accordingly if either input is.
14824  int LHSMaxIdx = -1;
14825  int RHSMinIdx = -1;
14826  int RHSMaxIdx = -1;
14827  int HalfVec = LHS.getValueType().getVectorNumElements() / 2;
14828 
14829  // Get the permuted scalar to vector nodes for the source(s) that come from
14830  // ISD::SCALAR_TO_VECTOR.
14831  // On big endian systems, this only makes sense for element sizes smaller
14832  // than 64 bits since for 64-bit elements, all instructions already put
14833  // the value into element zero. Since scalar size of LHS and RHS may differ
14834  // after isScalarToVec, this should be checked using their own sizes.
14835  if (SToVLHS) {
14836  if (!IsLittleEndian && SToVLHS.getValueType().getScalarSizeInBits() >= 64)
14837  return Res;
14838  // Set up the values for the shuffle vector fixup.
14839  LHSMaxIdx = NumEltsOut / NumEltsIn;
14840  SToVLHS = getSToVPermuted(SToVLHS, DAG, Subtarget);
14841  if (SToVLHS.getValueType() != LHS.getValueType())
14842  SToVLHS = DAG.getBitcast(LHS.getValueType(), SToVLHS);
14843  LHS = SToVLHS;
14844  }
14845  if (SToVRHS) {
14846  if (!IsLittleEndian && SToVRHS.getValueType().getScalarSizeInBits() >= 64)
14847  return Res;
14848  RHSMinIdx = NumEltsOut;
14849  RHSMaxIdx = NumEltsOut / NumEltsIn + RHSMinIdx;
14850  SToVRHS = getSToVPermuted(SToVRHS, DAG, Subtarget);
14851  if (SToVRHS.getValueType() != RHS.getValueType())
14852  SToVRHS = DAG.getBitcast(RHS.getValueType(), SToVRHS);
14853  RHS = SToVRHS;
14854  }
14855 
14856  // Fix up the shuffle mask to reflect where the desired element actually is.
14857  // The minimum and maximum indices that correspond to element zero for both
14858  // the LHS and RHS are computed and will control which shuffle mask entries
14859  // are to be changed. For example, if the RHS is permuted, any shuffle mask
14860  // entries in the range [RHSMinIdx,RHSMaxIdx) will be adjusted.
14861  fixupShuffleMaskForPermutedSToV(ShuffV, LHSMaxIdx, RHSMinIdx, RHSMaxIdx,
14862  HalfVec, ValidLaneWidth, Subtarget);
14863  Res = DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14864 
14865  // We may have simplified away the shuffle. We won't be able to do anything
14866  // further with it here.
14867  if (!isa<ShuffleVectorSDNode>(Res))
14868  return Res;
14869  Mask = cast<ShuffleVectorSDNode>(Res)->getMask();
14870  }
14871 
14872  SDValue TheSplat = IsLittleEndian ? RHS : LHS;
14873  // The common case after we commuted the shuffle is that the RHS is a splat
14874  // and we have elements coming in from the splat at indices that are not
14875  // conducive to using a merge.
14876  // Example:
14877  // vector_shuffle<0,17,1,19,2,21,3,23,4,25,5,27,6,29,7,31> t1, <zero>
14878  if (!isSplatBV(TheSplat))
14879  return Res;
14880 
14881  // We are looking for a mask such that all even elements are from
14882  // one vector and all odd elements from the other.
14883  if (!isAlternatingShuffMask(Mask, NumElts))
14884  return Res;
14885 
14886  // Adjust the mask so we are pulling in the same index from the splat
14887  // as the index from the interesting vector in consecutive elements.
14888  if (IsLittleEndian) {
14889  // Example (even elements from first vector):
14890  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> t1, <zero>
14891  if (Mask[0] < NumElts)
14892  for (int i = 1, e = Mask.size(); i < e; i += 2)
14893  ShuffV[i] = (ShuffV[i - 1] + NumElts);
14894  // Example (odd elements from first vector):
14895  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> t1, <zero>
14896  else
14897  for (int i = 0, e = Mask.size(); i < e; i += 2)
14898  ShuffV[i] = (ShuffV[i + 1] + NumElts);
14899  } else {
14900  // Example (even elements from first vector):
14901  // vector_shuffle<0,16,1,17,2,18,3,19,4,20,5,21,6,22,7,23> <zero>, t1
14902  if (Mask[0] < NumElts)
14903  for (int i = 0, e = Mask.size(); i < e; i += 2)
14904  ShuffV[i] = ShuffV[i + 1] - NumElts;
14905  // Example (odd elements from first vector):
14906  // vector_shuffle<16,0,17,1,18,2,19,3,20,4,21,5,22,6,23,7> <zero>, t1
14907  else
14908  for (int i = 1, e = Mask.size(); i < e; i += 2)
14909  ShuffV[i] = ShuffV[i - 1] - NumElts;
14910  }
14911 
14912  // If the RHS has undefs, we need to remove them since we may have created
14913  // a shuffle that adds those instead of the splat value.
14914  SDValue SplatVal =
14915  cast<BuildVectorSDNode>(TheSplat.getNode())->getSplatValue();
14916  TheSplat = DAG.getSplatBuildVector(TheSplat.getValueType(), dl, SplatVal);
14917 
14918  if (IsLittleEndian)
14919  RHS = TheSplat;
14920  else
14921  LHS = TheSplat;
14922  return DAG.getVectorShuffle(SVN->getValueType(0), dl, LHS, RHS, ShuffV);
14923 }
14924 
14925 SDValue PPCTargetLowering::combineVReverseMemOP(ShuffleVectorSDNode *SVN,
14926  LSBaseSDNode *LSBase,
14927  DAGCombinerInfo &DCI) const {
14928  assert((ISD::isNormalLoad(LSBase) || ISD::isNormalStore(LSBase)) &&
14929  "Not a reverse memop pattern!");
14930 
14931  auto IsElementReverse = [](const ShuffleVectorSDNode *SVN) -> bool {
14932  auto Mask = SVN->getMask();
14933  int i = 0;
14934  auto I = Mask.rbegin();
14935  auto E = Mask.rend();
14936 
14937  for (; I != E; ++I) {
14938  if (*I != i)
14939  return false;
14940  i++;
14941  }
14942  return true;
14943  };
14944 
14945  SelectionDAG &DAG = DCI.DAG;
14946  EVT VT = SVN->getValueType(0);
14947 
14948  if (!isTypeLegal(VT) || !Subtarget.isLittleEndian() || !Subtarget.hasVSX())
14949  return SDValue();
14950 
14951  // Before P9, we have PPCVSXSwapRemoval pass to hack the element order.
14952  // See comment in PPCVSXSwapRemoval.cpp.
14953  // It is conflict with PPCVSXSwapRemoval opt. So we don't do it.
14954  if (!Subtarget.hasP9Vector())
14955  return SDValue();
14956 
14957  if(!IsElementReverse(SVN))
14958  return SDValue();
14959 
14960  if (LSBase->getOpcode() == ISD::LOAD) {
14961  // If the load return value 0 has more than one user except the
14962  // shufflevector instruction, it is not profitable to replace the
14963  // shufflevector with a reverse load.
14964  for (SDNode::use_iterator UI = LSBase->use_begin(), UE = LSBase->use_end();
14965  UI != UE; ++UI)
14966  if (UI.getUse().getResNo() == 0 && UI->getOpcode() != ISD::VECTOR_SHUFFLE)
14967  return SDValue();
14968 
14969  SDLoc dl(LSBase);
14970  SDValue LoadOps[] = {LSBase->getChain(), LSBase->getBasePtr()};
14971  return DAG.getMemIntrinsicNode(
14972  PPCISD::LOAD_VEC_BE, dl, DAG.getVTList(VT, MVT::Other), LoadOps,
14973  LSBase->getMemoryVT(), LSBase->getMemOperand());
14974  }
14975 
14976  if (LSBase->getOpcode() == ISD::STORE) {
14977  // If there are other uses of the shuffle, the swap cannot be avoided.
14978  // Forcing the use of an X-Form (since swapped stores only have
14979  // X-Forms) without removing the swap is unprofitable.
14980  if (!SVN->hasOneUse())
14981  return SDValue();
14982 
14983  SDLoc dl(LSBase);
14984  SDValue StoreOps[] = {LSBase->getChain(), SVN->getOperand(0),
14985  LSBase->getBasePtr()};
14986  return DAG.getMemIntrinsicNode(
14987  PPCISD::STORE_VEC_BE, dl, DAG.getVTList(MVT::Other), StoreOps,
14988  LSBase->getMemoryVT(), LSBase->getMemOperand());
14989  }
14990 
14991  llvm_unreachable("Expected a load or store node here");
14992 }
14993 
14995  DAGCombinerInfo &DCI) const {
14996  SelectionDAG &DAG = DCI.DAG;
14997  SDLoc dl(N);
14998  switch (N->getOpcode()) {
14999  default: break;
15000  case ISD::ADD:
15001  return combineADD(N, DCI);
15002  case ISD::SHL:
15003  return combineSHL(N, DCI);
15004  case ISD::SRA:
15005  return combineSRA(N, DCI);
15006  case ISD::SRL:
15007  return combineSRL(N, DCI);
15008  case ISD::MUL:
15009  return combineMUL(N, DCI);
15010  case ISD::FMA:
15011  case PPCISD::FNMSUB:
15012  return combineFMALike(N, DCI);
15013  case PPCISD::SHL:
15014  if (isNullConstant(N->getOperand(0))) // 0 << V -> 0.
15015  return N->getOperand(0);
15016  break;
15017  case PPCISD::SRL:
15018  if (isNullConstant(N->getOperand(0))) // 0 >>u V -> 0.
15019  return N->getOperand(0);
15020  break;
15021  case PPCISD::SRA:
15022  if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(0))) {
15023  if (C->isZero() || // 0 >>s V -> 0.
15024  C->isAllOnes()) // -1 >>s V -> -1.
15025  return N->getOperand(0);
15026  }
15027  break;
15028  case ISD::SIGN_EXTEND:
15029  case ISD::ZERO_EXTEND:
15030  case ISD::ANY_EXTEND:
15031  return DAGCombineExtBoolTrunc(N, DCI);
15032  case ISD::TRUNCATE:
15033  return combineTRUNCATE(N, DCI);
15034  case ISD::SETCC:
15035  if (SDValue CSCC = combineSetCC(N, DCI))
15036  return CSCC;
15038  case ISD::SELECT_CC:
15039  return DAGCombineTruncBoolExt(N, DCI);
15040  case ISD::SINT_TO_FP:
15041  case ISD::UINT_TO_FP:
15042  return combineFPToIntToFP(N, DCI);
15043  case ISD::VECTOR_SHUFFLE:
15044  if (ISD::isNormalLoad(N->getOperand(0).getNode())) {
15045  LSBaseSDNode* LSBase = cast<LSBaseSDNode>(N->getOperand(0));
15046  return combineVReverseMemOP(cast<ShuffleVectorSDNode>(N), LSBase, DCI);
15047  }
15048  return combineVectorShuffle(cast<ShuffleVectorSDNode>(N), DCI.DAG);
15049  case ISD::STORE: {
15050 
15051  EVT Op1VT = N->getOperand(1).getValueType();
15052  unsigned Opcode = N->getOperand(1).getOpcode();
15053 
15054  if (Opcode == ISD::FP_TO_SINT || Opcode == ISD::FP_TO_UINT) {
15055  SDValue Val= combineStoreFPToInt(N, DCI);
15056  if (Val)
15057  return Val;
15058  }
15059 
15060  if (Opcode == ISD::VECTOR_SHUFFLE && ISD::isNormalStore(N)) {
15061  ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N->getOperand(1));
15062  SDValue Val= combineVReverseMemOP(SVN, cast<LSBaseSDNode>(N), DCI);
15063  if (Val)
15064  return Val;
15065  }
15066 
15067  // Turn STORE (BSWAP) -> sthbrx/stwbrx.
15068  if (cast<StoreSDNode>(N)->isUnindexed() && Opcode == ISD::BSWAP &&
15069  N->getOperand(1).getNode()->hasOneUse() &&
15070  (Op1VT == MVT::i32 || Op1VT == MVT::i16 ||
15071  (Subtarget.hasLDBRX() && Subtarget.isPPC64() && Op1VT == MVT::i64))) {
15072 
15073  // STBRX can only handle simple types and it makes no sense to store less
15074  // two bytes in byte-reversed order.
15075  EVT mVT = cast<StoreSDNode>(N)->getMemoryVT();
15076  if (mVT.isExtended() || mVT.getSizeInBits() < 16)
15077  break;
15078 
15079  SDValue BSwapOp = N->getOperand(1).getOperand(0);
15080  // Do an any-extend to 32-bits if this is a half-word input.
15081  if (BSwapOp.getValueType() == MVT::i16)
15082  BSwapOp = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, BSwapOp);
15083 
15084  // If the type of BSWAP operand is wider than stored memory width
15085  // it need to be shifted to the right side before STBRX.
15086  if (Op1VT.bitsGT(mVT)) {
15087  int Shift = Op1VT.getSizeInBits() - mVT.getSizeInBits();
15088  BSwapOp = DAG.getNode(ISD::SRL, dl, Op1VT, BSwapOp,
15089  DAG.getConstant(Shift, dl, MVT::i32));
15090  // Need to truncate if this is a bswap of i64 stored as i32/i16.
15091  if (Op1VT == MVT::i64)
15092  BSwapOp = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, BSwapOp);
15093  }
15094 
15095  SDValue Ops[] = {
15096  N->getOperand(0), BSwapOp, N->getOperand(2), DAG.getValueType(mVT)
15097  };
15098  return
15100  Ops, cast<StoreSDNode>(N)->getMemoryVT(),
15101  cast<StoreSDNode>(N)->getMemOperand());
15102  }
15103 
15104  // STORE Constant:i32<0> -> STORE<trunc to i32> Constant:i64<0>
15105  // So it can increase the chance of CSE constant construction.
15106  if (Subtarget.isPPC64() && !DCI.isBeforeLegalize() &&
15107  isa<ConstantSDNode>(N->getOperand(1)) && Op1VT == MVT::i32) {
15108  // Need to sign-extended to 64-bits to handle negative values.
15109  EVT MemVT = cast<StoreSDNode>(N)->getMemoryVT();
15110  uint64_t Val64 = SignExtend64(N->getConstantOperandVal(1),
15111  MemVT.getSizeInBits());
15112  SDValue Const64 = DAG.getConstant(Val64, dl, MVT::i64);
15113 
15114  // DAG.getTruncStore() can't be used here because it doesn't accept
15115  // the general (base + offset) addressing mode.
15116  // So we use UpdateNodeOperands and setTruncatingStore instead.
15117  DAG.UpdateNodeOperands(N, N->getOperand(0), Const64, N->getOperand(2),
15118  N->getOperand(3));
15119  cast<StoreSDNode>(N)->setTruncatingStore(true);
15120  return SDValue(N, 0);
15121  }
15122 
15123  // For little endian, VSX stores require generating xxswapd/lxvd2x.
15124  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15125  if (Op1VT.isSimple()) {
15126  MVT StoreVT = Op1VT.getSimpleVT();
15127  if (Subtarget.needsSwapsForVSXMemOps() &&
15128  (StoreVT == MVT::v2f64 || StoreVT == MVT::v2i64 ||
15129  StoreVT == MVT::v4f32 || StoreVT == MVT::v4i32))
15130  return expandVSXStoreForLE(N, DCI);
15131  }
15132  break;
15133  }
15134  case ISD::LOAD: {
15135  LoadSDNode *LD = cast<LoadSDNode>(N);
15136  EVT VT = LD->getValueType(0);
15137 
15138  // For little endian, VSX loads require generating lxvd2x/xxswapd.
15139  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15140  if (VT.isSimple()) {
15141  MVT LoadVT = VT.getSimpleVT();
15142  if (Subtarget.needsSwapsForVSXMemOps() &&
15143  (LoadVT == MVT::v2f64 || LoadVT == MVT::v2i64 ||
15144  LoadVT == MVT::v4f32 || LoadVT == MVT::v4i32))
15145  return expandVSXLoadForLE(N, DCI);
15146  }
15147 
15148  // We sometimes end up with a 64-bit integer load, from which we extract
15149  // two single-precision floating-point numbers. This happens with
15150  // std::complex<float>, and other similar structures, because of the way we
15151  // canonicalize structure copies. However, if we lack direct moves,
15152  // then the final bitcasts from the extracted integer values to the
15153  // floating-point numbers turn into store/load pairs. Even with direct moves,
15154  // just loading the two floating-point numbers is likely better.
15155  auto ReplaceTwoFloatLoad = [&]() {
15156  if (VT != MVT::i64)
15157  return false;
15158 
15159  if (LD->getExtensionType() != ISD::NON_EXTLOAD ||
15160  LD->isVolatile())
15161  return false;
15162 
15163  // We're looking for a sequence like this:
15164  // t13: i64,ch = load<LD8[%ref.tmp]> t0, t6, undef:i64
15165  // t16: i64 = srl t13, Constant:i32<32>
15166  // t17: i32 = truncate t16
15167  // t18: f32 = bitcast t17
15168  // t19: i32 = truncate t13
15169  // t20: f32 = bitcast t19
15170 
15171  if (!LD->hasNUsesOfValue(2, 0))
15172  return false;
15173 
15174  auto UI = LD->use_begin();
15175  while (UI.getUse().getResNo() != 0) ++UI;
15176  SDNode *Trunc = *UI++;
15177  while (UI.getUse().getResNo() != 0) ++UI;
15178  SDNode *RightShift = *UI;
15179  if (Trunc->getOpcode() != ISD::TRUNCATE)
15180  std::swap(Trunc, RightShift);
15181 
15182  if (Trunc->getOpcode() != ISD::TRUNCATE ||
15183  Trunc->getValueType(0) != MVT::i32 ||
15184  !Trunc->hasOneUse())
15185  return false;
15186  if (RightShift->getOpcode() != ISD::SRL ||
15187  !isa<ConstantSDNode>(RightShift->getOperand(1)) ||
15188  RightShift->getConstantOperandVal(1) != 32 ||
15189  !RightShift->hasOneUse())
15190  return false;
15191 
15192  SDNode *Trunc2 = *RightShift->use_begin();
15193  if (Trunc2->getOpcode() != ISD::TRUNCATE ||
15194  Trunc2->getValueType(0) != MVT::i32 ||
15195  !Trunc2->hasOneUse())
15196  return false;
15197 
15198  SDNode *Bitcast = *Trunc->use_begin();
15199  SDNode *Bitcast2 = *Trunc2->use_begin();
15200 
15201  if (Bitcast->getOpcode() != ISD::BITCAST ||
15202  Bitcast->getValueType(0) != MVT::f32)
15203  return false;
15204  if (Bitcast2->getOpcode() != ISD::BITCAST ||
15205  Bitcast2->getValueType(0) != MVT::f32)
15206  return false;
15207 
15208  if (Subtarget.isLittleEndian())
15209  std::swap(Bitcast, Bitcast2);
15210 
15211  // Bitcast has the second float (in memory-layout order) and Bitcast2
15212  // has the first one.
15213 
15214  SDValue BasePtr = LD->getBasePtr();
15215  if (LD->isIndexed()) {
15216  assert(LD->getAddressingMode() == ISD::PRE_INC &&
15217  "Non-pre-inc AM on PPC?");
15218  BasePtr =
15219  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15220  LD->getOffset());
15221  }
15222 
15223  auto MMOFlags =
15224  LD->getMemOperand()->getFlags() & ~MachineMemOperand::MOVolatile;
15225  SDValue FloatLoad = DAG.getLoad(MVT::f32, dl, LD->getChain(), BasePtr,
15226  LD->getPointerInfo(), LD->getAlignment(),
15227  MMOFlags, LD->getAAInfo());
15228  SDValue AddPtr =
15229  DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(),
15230  BasePtr, DAG.getIntPtrConstant(4, dl));
15231  SDValue FloatLoad2 = DAG.getLoad(
15232  MVT::f32, dl, SDValue(FloatLoad.getNode(), 1), AddPtr,
15233  LD->getPointerInfo().getWithOffset(4),
15234  MinAlign(LD->getAlignment(), 4), MMOFlags, LD->getAAInfo());
15235 
15236  if (LD->isIndexed()) {
15237  // Note that DAGCombine should re-form any pre-increment load(s) from
15238  // what is produced here if that makes sense.
15239  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), BasePtr);
15240  }
15241 
15242  DCI.CombineTo(Bitcast2, FloatLoad);
15243  DCI.CombineTo(Bitcast, FloatLoad2);
15244 
15245  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, LD->isIndexed() ? 2 : 1),
15246  SDValue(FloatLoad2.getNode(), 1));
15247  return true;
15248  };
15249 
15250  if (ReplaceTwoFloatLoad())
15251  return SDValue(N, 0);
15252 
15253  EVT MemVT = LD->getMemoryVT();
15254  Type *Ty = MemVT.getTypeForEVT(*DAG.getContext());
15255  Align ABIAlignment = DAG.getDataLayout().getABITypeAlign(Ty);
15256  if (LD->isUnindexed() && VT.isVector() &&
15257  ((Subtarget.hasAltivec() && ISD::isNON_EXTLoad(N) &&
15258  // P8 and later hardware should just use LOAD.
15259  !Subtarget.hasP8Vector() &&
15260  (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
15261  VT == MVT::v4f32))) &&
15262  LD->getAlign() < ABIAlignment) {
15263  // This is a type-legal unaligned Altivec load.
15264  SDValue Chain = LD->getChain();
15265  SDValue Ptr = LD->getBasePtr();
15266  bool isLittleEndian = Subtarget.isLittleEndian();
15267 
15268  // This implements the loading of unaligned vectors as described in
15269  // the venerable Apple Velocity Engine overview. Specifically:
15270  // https://developer.apple.com/hardwaredrivers/ve/alignment.html
15271  // https://developer.apple.com/hardwaredrivers/ve/code_optimization.html
15272  //
15273  // The general idea is to expand a sequence of one or more unaligned
15274  // loads into an alignment-based permutation-control instruction (lvsl
15275  // or lvsr), a series of regular vector loads (which always truncate
15276  // their input address to an aligned address), and a series of
15277  // permutations. The results of these permutations are the requested
15278  // loaded values. The trick is that the last "extra" load is not taken
15279  // from the address you might suspect (sizeof(vector) bytes after the
15280  // last requested load), but rather sizeof(vector) - 1 bytes after the
15281  // last requested vector. The point of this is to avoid a page fault if
15282  // the base address happened to be aligned. This works because if the
15283  // base address is aligned, then adding less than a full vector length
15284  // will cause the last vector in the sequence to be (re)loaded.
15285  // Otherwise, the next vector will be fetched as you might suspect was
15286  // necessary.
15287 
15288  // We might be able to reuse the permutation generation from
15289  // a different base address offset from this one by an aligned amount.
15290  // The INTRINSIC_WO_CHAIN DAG combine will attempt to perform this
15291  // optimization later.
15292  Intrinsic::ID Intr, IntrLD, IntrPerm;
15293  MVT PermCntlTy, PermTy, LDTy;
15294  Intr = isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15295  : Intrinsic::ppc_altivec_lvsl;
15296  IntrLD = Intrinsic::ppc_altivec_lvx;
15297  IntrPerm = Intrinsic::ppc_altivec_vperm;
15298  PermCntlTy = MVT::v16i8;
15299  PermTy = MVT::v4i32;
15300  LDTy = MVT::v4i32;
15301 
15302  SDValue PermCntl = BuildIntrinsicOp(Intr, Ptr, DAG, dl, PermCntlTy);
15303 
15304  // Create the new MMO for the new base load. It is like the original MMO,
15305  // but represents an area in memory almost twice the vector size centered
15306  // on the original address. If the address is unaligned, we might start
15307  // reading up to (sizeof(vector)-1) bytes below the address of the
15308  // original unaligned load.
15309  MachineFunction &MF = DAG.getMachineFunction();
15310  MachineMemOperand *BaseMMO =
15311  MF.getMachineMemOperand(LD->getMemOperand(),
15312  -(long)MemVT.getStoreSize()+1,
15313  2*MemVT.getStoreSize()-1);
15314 
15315  // Create the new base load.
15316  SDValue LDXIntID =
15317  DAG.getTargetConstant(IntrLD, dl, getPointerTy(MF.getDataLayout()));
15318  SDValue BaseLoadOps[] = { Chain, LDXIntID, Ptr };
15319  SDValue BaseLoad =
15321  DAG.getVTList(PermTy, MVT::Other),
15322  BaseLoadOps, LDTy, BaseMMO);
15323 
15324  // Note that the value of IncOffset (which is provided to the next
15325  // load's pointer info offset value, and thus used to calculate the
15326  // alignment), and the value of IncValue (which is actually used to
15327  // increment the pointer value) are different! This is because we
15328  // require the next load to appear to be aligned, even though it
15329  // is actually offset from the base pointer by a lesser amount.
15330  int IncOffset = VT.getSizeInBits() / 8;
15331  int IncValue = IncOffset;
15332 
15333  // Walk (both up and down) the chain looking for another load at the real
15334  // (aligned) offset (the alignment of the other load does not matter in
15335  // this case). If found, then do not use the offset reduction trick, as
15336  // that will prevent the loads from being later combined (as they would
15337  // otherwise be duplicates).
15338  if (!findConsecutiveLoad(LD, DAG))
15339  --IncValue;
15340 
15341  SDValue Increment =
15342  DAG.getConstant(IncValue, dl, getPointerTy(MF.getDataLayout()));
15343  Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, Increment);
15344 
15345  MachineMemOperand *ExtraMMO =
15346  MF.getMachineMemOperand(LD->getMemOperand(),
15347  1, 2*MemVT.getStoreSize()-1);
15348  SDValue ExtraLoadOps[] = { Chain, LDXIntID, Ptr };
15349  SDValue ExtraLoad =
15351  DAG.getVTList(PermTy, MVT::Other),
15352  ExtraLoadOps, LDTy, ExtraMMO);
15353 
15355  BaseLoad.getValue(1), ExtraLoad.getValue(1));
15356 
15357  // Because vperm has a big-endian bias, we must reverse the order
15358  // of the input vectors and complement the permute control vector
15359  // when generating little endian code. We have already handled the
15360  // latter by using lvsr instead of lvsl, so just reverse BaseLoad
15361  // and ExtraLoad here.
15362  SDValue Perm;
15363  if (isLittleEndian)
15364  Perm = BuildIntrinsicOp(IntrPerm,
15365  ExtraLoad, BaseLoad, PermCntl, DAG, dl);
15366  else
15367  Perm = BuildIntrinsicOp(IntrPerm,
15368  BaseLoad, ExtraLoad, PermCntl, DAG, dl);
15369 
15370  if (VT != PermTy)
15371  Perm = Subtarget.hasAltivec()
15372  ? DAG.getNode(ISD::BITCAST, dl, VT, Perm)
15373  : DAG.getNode(ISD::FP_ROUND, dl, VT, Perm,
15374  DAG.getTargetConstant(1, dl, MVT::i64));
15375  // second argument is 1 because this rounding
15376  // is always exact.
15377 
15378  // The output of the permutation is our loaded result, the TokenFactor is
15379  // our new chain.
15380  DCI.CombineTo(N, Perm, TF);
15381  return SDValue(N, 0);
15382  }
15383  }
15384  break;
15385  case ISD::INTRINSIC_WO_CHAIN: {
15386  bool isLittleEndian = Subtarget.isLittleEndian();
15387  unsigned IID = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
15388  Intrinsic::ID Intr = (isLittleEndian ? Intrinsic::ppc_altivec_lvsr
15389  : Intrinsic::ppc_altivec_lvsl);
15390  if (IID == Intr && N->getOperand(1)->getOpcode() == ISD::ADD) {
15391  SDValue Add = N->getOperand(1);
15392 
15393  int Bits = 4 /* 16 byte alignment */;
15394 
15395  if (DAG.MaskedValueIsZero(Add->getOperand(1),
15396  APInt::getAllOnes(Bits /* alignment */)
15397  .zext(Add.getScalarValueSizeInBits()))) {
15398  SDNode *BasePtr = Add->getOperand(0).getNode();
15399  for (SDNode *U : BasePtr->uses()) {
15400  if (U->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15401  cast<ConstantSDNode>(U->getOperand(0))->getZExtValue() == IID) {
15402  // We've found another LVSL/LVSR, and this address is an aligned
15403  // multiple of that one. The results will be the same, so use the
15404  // one we've just found instead.
15405 
15406  return SDValue(U, 0);
15407  }
15408  }
15409  }
15410 
15411  if (isa<ConstantSDNode>(Add->getOperand(1))) {
15412  SDNode *BasePtr = Add->getOperand(0).getNode();
15413  for (SDNode *U : BasePtr->uses()) {
15414  if (U->getOpcode() == ISD::ADD &&
15415  isa<ConstantSDNode>(U->getOperand(1)) &&
15416  (cast<ConstantSDNode>(Add->getOperand(1))->getZExtValue() -
15417  cast<ConstantSDNode>(U->getOperand(1))->getZExtValue()) %
15418  (1ULL << Bits) ==
15419  0) {
15420  SDNode *OtherAdd = U;
15421  for (SDNode *V : OtherAdd->uses()) {
15422  if (V->getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15423  cast<ConstantSDNode>(V->getOperand(0))->getZExtValue() ==
15424  IID) {
15425  return SDValue(V, 0);
15426  }
15427  }
15428  }
15429  }
15430  }
15431  }
15432 
15433  // Combine vmaxsw/h/b(a, a's negation) to abs(a)
15434  // Expose the vabsduw/h/b opportunity for down stream
15435  if (!DCI.isAfterLegalizeDAG() && Subtarget.hasP9Altivec() &&
15436  (IID == Intrinsic::ppc_altivec_vmaxsw ||
15437  IID == Intrinsic::ppc_altivec_vmaxsh ||
15438  IID == Intrinsic::ppc_altivec_vmaxsb)) {
15439  SDValue V1 = N->getOperand(1);
15440  SDValue V2 = N->getOperand(2);
15441  if ((V1.getSimpleValueType() == MVT::v4i32 ||
15442  V1.getSimpleValueType() == MVT::v8i16 ||
15443  V1.getSimpleValueType() == MVT::v16i8) &&
15444  V1.getSimpleValueType() == V2.getSimpleValueType()) {
15445  // (0-a, a)
15446  if (V1.getOpcode() == ISD::SUB &&
15448  V1.getOperand(1) == V2) {
15449  return DAG.getNode(ISD::ABS, dl, V2.getValueType(), V2);
15450  }
15451  // (a, 0-a)
15452  if (V2.getOpcode() == ISD::SUB &&
15453  ISD::isBuildVectorAllZeros(V2.getOperand(0).getNode()) &&
15454  V2.getOperand(1) == V1) {
15455  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15456  }
15457  // (x-y, y-x)
15458  if (V1.getOpcode() == ISD::SUB && V2.getOpcode() == ISD::SUB &&
15459  V1.getOperand(0) == V2.getOperand(1) &&
15460  V1.getOperand(1) == V2.getOperand(0)) {
15461  return DAG.getNode(ISD::ABS, dl, V1.getValueType(), V1);
15462  }
15463  }
15464  }
15465  }
15466 
15467  break;
15469  // For little endian, VSX loads require generating lxvd2x/xxswapd.
15470  // Not needed on ISA 3.0 based CPUs since we have a non-permuting load.
15471  if (Subtarget.needsSwapsForVSXMemOps()) {
15472  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15473  default:
15474  break;
15475  case Intrinsic::ppc_vsx_lxvw4x:
15476  case Intrinsic::ppc_vsx_lxvd2x:
15477  return expandVSXLoadForLE(N, DCI);
15478  }
15479  }
15480  break;
15481  case ISD::INTRINSIC_VOID:
15482  // For little endian, VSX stores require generating xxswapd/stxvd2x.
15483  // Not needed on ISA 3.0 based CPUs since we have a non-permuting store.
15484  if (Subtarget.needsSwapsForVSXMemOps()) {
15485  switch (cast<ConstantSDNode>(N->getOperand(1))->getZExtValue()) {
15486  default:
15487  break;
15488  case Intrinsic::ppc_vsx_stxvw4x:
15489  case Intrinsic::ppc_vsx_stxvd2x:
15490  return expandVSXStoreForLE(N, DCI);
15491  }
15492  }
15493  break;
15494  case ISD::BSWAP: {
15495  // Turn BSWAP (LOAD) -> lhbrx/lwbrx.
15496  // For subtargets without LDBRX, we can still do better than the default
15497  // expansion even for 64-bit BSWAP (LOAD).
15498  bool Is64BitBswapOn64BitTgt =
15499  Subtarget.isPPC64() && N->getValueType(0) == MVT::i64;
15500  bool IsSingleUseNormalLd = ISD::isNormalLoad(N->getOperand(0).getNode()) &&
15501  N->getOperand(0).hasOneUse();
15502  if (IsSingleUseNormalLd &&
15503  (N->getValueType(0) == MVT::i32 || N->getValueType(0) == MVT::i16 ||
15504  (Subtarget.hasLDBRX() && Is64BitBswapOn64BitTgt))) {
15505  SDValue Load = N->getOperand(0);
15506  LoadSDNode *LD = cast<LoadSDNode>(Load);
15507  // Create the byte-swapping load.
15508  SDValue Ops[] = {
15509  LD->getChain(), // Chain
15510  LD->getBasePtr(), // Ptr
15511  DAG.getValueType(N->getValueType(0)) // VT
15512  };
15513  SDValue BSLoad =
15515  DAG.getVTList(N->getValueType(0) == MVT::i64 ?
15517  Ops, LD->getMemoryVT(), LD->getMemOperand());
15518 
15519  // If this is an i16 load, insert the truncate.
15520  SDValue ResVal = BSLoad;
15521  if (N->getValueType(0) == MVT::i16)
15522  ResVal = DAG.getNode(ISD::TRUNCATE, dl, MVT::i16, BSLoad);
15523 
15524  // First, combine the bswap away. This makes the value produced by the
15525  // load dead.
15526  DCI.CombineTo(N, ResVal);
15527 
15528  // Next, combine the load away, we give it a bogus result value but a real
15529  // chain result. The result value is dead because the bswap is dead.
15530  DCI.CombineTo(Load.getNode(), ResVal, BSLoad.getValue(1));
15531 
15532  // Return N so it doesn't get rechecked!
15533  return SDValue(N, 0);
15534  }
15535  // Convert this to two 32-bit bswap loads and a BUILD_PAIR. Do this only
15536  // before legalization so that the BUILD_PAIR is handled correctly.
15537  if (!DCI.isBeforeLegalize() || !Is64BitBswapOn64BitTgt ||
15538  !IsSingleUseNormalLd)
15539  return SDValue();
15540  LoadSDNode *LD = cast<LoadSDNode>(N->getOperand(0));
15541 
15542  // Can't split volatile or atomic loads.
15543  if (!LD->isSimple())
15544  return SDValue();
15545  SDValue BasePtr = LD->getBasePtr();
15546  SDValue Lo = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr,
15547  LD->getPointerInfo(), LD->getAlignment());
15548  Lo = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Lo);
15549  BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr,
15550  DAG.getIntPtrConstant(4, dl));
15552  LD->getMemOperand(), 4, 4);
15553  SDValue Hi = DAG.getLoad(MVT::i32, dl, LD->getChain(), BasePtr, NewMMO);
15554  Hi = DAG.getNode(ISD::BSWAP, dl, MVT::i32, Hi);
15555  SDValue Res;
15556  if (Subtarget.isLittleEndian())
15557  Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Hi, Lo);
15558  else
15559  Res = DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, Lo, Hi);
15560  SDValue TF =
15562  Hi.getOperand(0).getValue(1), Lo.getOperand(0).getValue(1));
15563  DAG.ReplaceAllUsesOfValueWith(SDValue(LD, 1), TF);
15564  return Res;
15565  }
15566  case PPCISD::VCMP:
15567  // If a VCMP_rec node already exists with exactly the same operands as this
15568  // node, use its result instead of this node (VCMP_rec computes both a CR6
15569  // and a normal output).
15570  //
15571  if (!N->getOperand(0).hasOneUse() &&
15572  !N->getOperand(1).hasOneUse() &&
15573  !N->getOperand(2).hasOneUse()) {
15574 
15575  // Scan all of the users of the LHS, looking for VCMP_rec's that match.
15576  SDNode *VCMPrecNode = nullptr;
15577 
15578  SDNode *LHSN = N->getOperand(0).getNode();
15579  for (SDNode::use_iterator UI = LHSN->use_begin(), E = LHSN->use_end();
15580  UI != E; ++UI)
15581  if (UI->getOpcode() == PPCISD::VCMP_rec &&
15582  UI->getOperand(1) == N->getOperand(1) &&
15583  UI->getOperand(2) == N->getOperand(2) &&
15584  UI->getOperand(0) == N->getOperand(0)) {
15585  VCMPrecNode = *UI;
15586  break;
15587  }
15588 
15589  // If there is no VCMP_rec node, or if the flag value has a single use,
15590  // don't transform this.
15591  if (!VCMPrecNode || VCMPrecNode->hasNUsesOfValue(0, 1))
15592  break;
15593 
15594  // Look at the (necessarily single) use of the flag value. If it has a
15595  // chain, this transformation is more complex. Note that multiple things
15596  // could use the value result, which we should ignore.
15597  SDNode *FlagUser = nullptr;
15598  for (SDNode::use_iterator UI = VCMPrecNode->use_begin();
15599  FlagUser == nullptr; ++UI) {
15600  assert(UI != VCMPrecNode->use_end() && "Didn't find user!");
15601  SDNode *User = *UI;
15602  for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
15603  if (User->getOperand(i) == SDValue(VCMPrecNode, 1)) {
15604  FlagUser = User;
15605  break;
15606  }
15607  }
15608  }
15609 
15610  // If the user is a MFOCRF instruction, we know this is safe.
15611  // Otherwise we give up for right now.
15612  if (FlagUser->getOpcode() == PPCISD::MFOCRF)
15613  return SDValue(VCMPrecNode, 0);
15614  }
15615  break;
15616  case ISD::BRCOND: {
15617  SDValue Cond = N->getOperand(1);
15618  SDValue Target = N->getOperand(2);
15619 
15620  if (Cond.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15621  cast<ConstantSDNode>(Cond.getOperand(1))->getZExtValue() ==
15622  Intrinsic::loop_decrement) {
15623 
15624  // We now need to make the intrinsic dead (it cannot be instruction
15625  // selected).
15626  DAG.ReplaceAllUsesOfValueWith(Cond.getValue(1), Cond.getOperand(0));
15627  assert(Cond.getNode()->hasOneUse() &&
15628  "Counter decrement has more than one use");
15629 
15630  return DAG.getNode(PPCISD::BDNZ, dl, MVT::Other,
15631  N->getOperand(0), Target);
15632  }
15633  }
15634  break;
15635  case ISD::BR_CC: {
15636  // If this is a branch on an altivec predicate comparison, lower this so
15637  // that we don't have to do a MFOCRF: instead, branch directly on CR6. This
15638  // lowering is done pre-legalize, because the legalizer lowers the predicate
15639  // compare down to code that is difficult to reassemble.
15640  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(1))->get();
15641  SDValue LHS = N->getOperand(2), RHS = N->getOperand(3);
15642 
15643  // Sometimes the promoted value of the intrinsic is ANDed by some non-zero
15644  // value. If so, pass-through the AND to get to the intrinsic.
15645  if (LHS.getOpcode() == ISD::AND &&
15646  LHS.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15647  cast<ConstantSDNode>(LHS.getOperand(0).getOperand(1))->getZExtValue() ==
15648  Intrinsic::loop_decrement &&
15649  isa<ConstantSDNode>(LHS.getOperand(1)) &&
15650  !isNullConstant(LHS.getOperand(1)))
15651  LHS = LHS.getOperand(0);
15652 
15653  if (LHS.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
15654  cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue() ==
15655  Intrinsic::loop_decrement &&
15656  isa<ConstantSDNode>(RHS)) {
15657  assert((CC == ISD::SETEQ || CC == ISD::SETNE) &&
15658  "Counter decrement comparison is not EQ or NE");
15659 
15660  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15661  bool isBDNZ = (CC == ISD::SETEQ && Val) ||
15662  (CC == ISD::SETNE && !Val);
15663 
15664  // We now need to make the intrinsic dead (it cannot be instruction
15665  // selected).
15666  DAG.ReplaceAllUsesOfValueWith(LHS.getValue(1), LHS.getOperand(0));
15667  assert(LHS.getNode()->hasOneUse() &&
15668  "Counter decrement has more than one use");
15669 
15670  return DAG.getNode(isBDNZ ? PPCISD::BDNZ : PPCISD::BDZ, dl, MVT::Other,
15671  N->getOperand(0), N->getOperand(4));
15672  }
15673 
15674  int CompareOpc;
15675  bool isDot;
15676 
15677  if (LHS.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
15678  isa<ConstantSDNode>(RHS) && (CC == ISD::SETEQ || CC == ISD::SETNE) &&
15679  getVectorCompareInfo(LHS, CompareOpc, isDot, Subtarget)) {
15680  assert(isDot && "Can't compare against a vector result!");
15681 
15682  // If this is a comparison against something other than 0/1, then we know
15683  // that the condition is never/always true.
15684  unsigned Val = cast<ConstantSDNode>(RHS)->getZExtValue();
15685  if (Val != 0 && Val != 1) {
15686  if (CC == ISD::SETEQ) // Cond never true, remove branch.
15687  return N->getOperand(0);
15688  // Always !=, turn it into an unconditional branch.
15689  return DAG.getNode(ISD::BR, dl, MVT::Other,
15690  N->getOperand(0), N->getOperand(4));
15691  }
15692 
15693  bool BranchOnWhenPredTrue = (CC == ISD::SETEQ) ^ (Val == 0);
15694 
15695  // Create the PPCISD altivec 'dot' comparison node.
15696  SDValue Ops[] = {
15697  LHS.getOperand(2), // LHS of compare
15698  LHS.getOperand(3), // RHS of compare
15699  DAG.getConstant(CompareOpc, dl, MVT::i32)
15700  };
15701  EVT VTs[] = { LHS.getOperand(2).getValueType(), MVT::Glue };
15702  SDValue CompNode = DAG.getNode(PPCISD::VCMP_rec, dl, VTs, Ops);
15703 
15704  // Unpack the result based on how the target uses it.
15705  PPC::Predicate CompOpc;
15706  switch (cast<ConstantSDNode>(LHS.getOperand(1))->getZExtValue()) {
15707  default: // Can't happen, don't crash on invalid number though.
15708  case 0: // Branch on the value of the EQ bit of CR6.
15709  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_EQ : PPC::PRED_NE;
15710  break;
15711  case 1: // Branch on the inverted value of the EQ bit of CR6.
15712  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_NE : PPC::PRED_EQ;
15713  break;
15714  case 2: // Branch on the value of the LT bit of CR6.
15715  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_LT : PPC::PRED_GE;
15716  break;
15717  case 3: // Branch on the inverted value of the LT bit of CR6.
15718  CompOpc = BranchOnWhenPredTrue ? PPC::PRED_GE : PPC::PRED_LT;
15719  break;
15720  }
15721 
15722  return DAG.getNode(PPCISD::COND_BRANCH, dl, MVT::Other, N->getOperand(0),
15723  DAG.getConstant(CompOpc, dl, MVT::i32),
15724  DAG.getRegister(PPC::CR6, MVT::i32),
15725  N->getOperand(4), CompNode.getValue(1));
15726  }
15727  break;
15728  }
15729  case ISD::BUILD_VECTOR:
15730  return DAGCombineBuildVector(N, DCI);
15731  case ISD::ABS:
15732  return combineABS(N, DCI);
15733  case ISD::VSELECT:
15734  return combineVSelect(N, DCI);
15735  }
15736 
15737  return SDValue();
15738 }
15739 
15740 SDValue
15742  SelectionDAG &DAG,
15743  SmallVectorImpl<SDNode *> &Created) const {
15744  // fold (sdiv X, pow2)
15745  EVT VT = N->getValueType(0);
15746  if (VT == MVT::i64 && !Subtarget.isPPC64())
15747  return SDValue();
15748  if ((VT != MVT::i32 && VT != MVT::i64) ||
15749  !(Divisor.isPowerOf2() || Divisor.isNegatedPowerOf2()))
15750  return SDValue();
15751 
15752  SDLoc DL(N);
15753  SDValue N0 = N->getOperand(0);
15754 
15755  bool IsNegPow2 = Divisor.isNegatedPowerOf2();
15756  unsigned Lg2 = (IsNegPow2 ? -Divisor : Divisor).countTrailingZeros();
15757  SDValue ShiftAmt = DAG.getConstant(Lg2, DL, VT);
15758 
15759  SDValue Op = DAG.getNode(PPCISD::SRA_ADDZE, DL, VT, N0, ShiftAmt);
15760  Created.push_back(Op.getNode());
15761 
15762  if (IsNegPow2) {
15763  Op = DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Op);
15764  Created.push_back(Op.getNode());
15765  }
15766 
15767  return Op;
15768 }
15769 
15770 //===----------------------------------------------------------------------===//
15771 // Inline Assembly Support
15772 //===----------------------------------------------------------------------===//
15773 
15775  KnownBits &Known,
15776  const APInt &DemandedElts,
15777  const SelectionDAG &DAG,
15778  unsigned Depth) const {
15779  Known.resetAll();
15780  switch (Op.getOpcode()) {
15781  default: break;
15782  case PPCISD::LBRX: {
15783  // lhbrx is known to have the top bits cleared out.
15784  if (cast<VTSDNode>(Op.getOperand(2))->getVT() == MVT::i16)
15785  Known.Zero = 0xFFFF0000;
15786  break;
15787  }
15788  case ISD::INTRINSIC_WO_CHAIN: {
15789  switch (cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue()) {
15790  default: break;
15791  case Intrinsic::ppc_altivec_vcmpbfp_p:
15792  case Intrinsic::ppc_altivec_vcmpeqfp_p:
15793  case Intrinsic::ppc_altivec_vcmpequb_p:
15794  case Intrinsic::ppc_altivec_vcmpequh_p:
15795  case Intrinsic::ppc_altivec_vcmpequw_p:
15796  case Intrinsic::ppc_altivec_vcmpequd_p:
15797  case Intrinsic::ppc_altivec_vcmpequq_p:
15798  case Intrinsic::ppc_altivec_vcmpgefp_p:
15799  case Intrinsic::ppc_altivec_vcmpgtfp_p:
15800  case Intrinsic::ppc_altivec_vcmpgtsb_p:
15801  case Intrinsic::ppc_altivec_vcmpgtsh_p:
15802  case Intrinsic::ppc_altivec_vcmpgtsw_p:
15803  case Intrinsic::ppc_altivec_vcmpgtsd_p:
15804  case Intrinsic::ppc_altivec_vcmpgtsq_p:
15805  case Intrinsic::ppc_altivec_vcmpgtub_p:
15806  case Intrinsic::ppc_altivec_vcmpgtuh_p:
15807  case Intrinsic::ppc_altivec_vcmpgtuw_p:
15808  case Intrinsic::ppc_altivec_vcmpgtud_p:
15809  case Intrinsic::ppc_altivec_vcmpgtuq_p:
15810  Known.Zero = ~1U; // All bits but the low one are known to be zero.
15811  break;
15812  }
15813  break;
15814  }
15815  case ISD::INTRINSIC_W_CHAIN: {
15816  switch (cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue()) {
15817  default:
15818  break;
15819  case Intrinsic::ppc_load2r:
15820  // Top bits are cleared for load2r (which is the same as lhbrx).
15821  Known.Zero = 0xFFFF0000;
15822  break;
15823  }
15824  break;
15825  }
15826  }
15827 }
15828 
15830  switch (Subtarget.getCPUDirective()) {
15831  default: break;
15832  case PPC::DIR_970:
15833  case PPC::DIR_PWR4:
15834  case PPC::DIR_PWR5:
15835  case PPC::DIR_PWR5X:
15836  case PPC::DIR_PWR6:
15837  case PPC::DIR_PWR6X:
15838  case PPC::DIR_PWR7:
15839  case PPC::DIR_PWR8:
15840  case PPC::DIR_PWR9:
15841  case PPC::DIR_PWR10:
15842  case PPC::DIR_PWR_FUTURE: {
15843  if (!ML)
15844  break;
15845 
15847  // If the nested loop is an innermost loop, prefer to a 32-byte alignment,
15848  // so that we can decrease cache misses and branch-prediction misses.
15849  // Actual alignment of the loop will depend on the hotness check and other
15850  // logic in alignBlocks.
15851  if (ML->getLoopDepth() > 1 && ML->getSubLoops().empty())
15852  return Align(32);
15853  }
15854 
15855  const PPCInstrInfo *TII = Subtarget.getInstrInfo();
15856 
15857  // For small loops (between 5 and 8 instructions), align to a 32-byte
15858  // boundary so that the entire loop fits in one instruction-cache line.
15859  uint64_t LoopSize = 0;
15860  for (auto I = ML->block_begin(), IE = ML->block_end(); I != IE; ++I)
15861  for (auto J = (*I)->begin(), JE = (*I)->end(); J != JE; ++J) {
15862  LoopSize += TII->getInstSizeInBytes(*J);
15863  if (LoopSize > 32)
15864  break;
15865  }
15866 
15867  if (LoopSize > 16 && LoopSize <= 32)
15868  return Align(32);
15869 
15870  break;
15871  }
15872  }
15873 
15875 }
15876 
15877 /// getConstraintType - Given a constraint, return the type of
15878 /// constraint it is for this target.
15881  if (Constraint.size() == 1) {
15882  switch (Constraint[0]) {
15883  default: break;
15884  case 'b':
15885  case 'r':
15886  case 'f':
15887  case 'd':
15888  case 'v':
15889  case 'y':
15890  return C_RegisterClass;
15891  case 'Z':
15892  // FIXME: While Z does indicate a memory constraint, it specifically
15893  // indicates an r+r address (used in conjunction with the 'y' modifier
15894  // in the replacement string). Currently, we're forcing the base
15895  // register to be r0 in the asm printer (which is interpreted as zero)
15896  // and forming the complete address in the second register. This is
15897  // suboptimal.
15898  return C_Memory;
15899  }
15900  } else if (Constraint == "wc") { // individual CR bits.
15901  return C_RegisterClass;
15902  } else if (Constraint == "wa" || Constraint == "wd" ||
15903  Constraint == "wf" || Constraint == "ws" ||
15904  Constraint == "wi" || Constraint == "ww") {
15905  return C_RegisterClass; // VSX registers.
15906  }
15907  return TargetLowering::getConstraintType(Constraint);
15908 }
15909 
15910 /// Examine constraint type and operand type and determine a weight value.
15911 /// This object must already have been set up with the operand type
15912 /// and the current alternative constraint selected.
15915  AsmOperandInfo &info, const char *constraint) const {
15916  ConstraintWeight weight = CW_Invalid;
15917  Value *CallOperandVal = info.CallOperandVal;
15918  // If we don't have a value, we can't do a match,
15919  // but allow it at the lowest weight.
15920  if (!CallOperandVal)
15921  return CW_Default;
15922  Type *type = CallOperandVal->getType();
15923 
15924  // Look at the constraint type.
15925  if (StringRef(constraint) == "wc" && type->isIntegerTy(1))
15926  return CW_Register; // an individual CR bit.
15927  else if ((StringRef(constraint) == "wa" ||
15928  StringRef(constraint) == "wd" ||
15929  StringRef(constraint) == "wf") &&
15930  type->isVectorTy())
15931  return CW_Register;
15932  else if (StringRef(constraint) == "wi" && type->isIntegerTy(64))
15933  return CW_Register; // just hold 64-bit integers data.
15934  else if (StringRef(constraint) == "ws" && type->isDoubleTy())
15935  return CW_Register;
15936  else if (StringRef(constraint) == "ww" && type->isFloatTy())
15937  return CW_Register;
15938 
15939  switch (*constraint) {
15940  default:
15942  break;
15943  case 'b':
15944  if (type->isIntegerTy())
15945  weight = CW_Register;
15946  break;
15947  case 'f':
15948  if (type->isFloatTy())
15949  weight = CW_Register;
15950  break;
15951  case 'd':
15952  if (type->isDoubleTy())
15953  weight = CW_Register;
15954  break;
15955  case 'v':
15956  if (type->isVectorTy())
15957  weight = CW_Register;
15958  break;
15959  case 'y':
15960  weight = CW_Register;
15961  break;
15962  case 'Z':
15963  weight = CW_Memory;
15964  break;
15965  }
15966  return weight;
15967 }
15968 
15969 std::pair<unsigned, const TargetRegisterClass *>
15971  StringRef Constraint,
15972  MVT VT) const {
15973  if (Constraint.size() == 1) {
15974  // GCC RS6000 Constraint Letters
15975  switch (Constraint[0]) {
15976  case 'b': // R1-R31
15977  if (VT == MVT::i64 && Subtarget.isPPC64())
15978  return std::make_pair(0U, &PPC::G8RC_NOX0RegClass);
15979  return std::make_pair(0U, &PPC::GPRC_NOR0RegClass);
15980  case 'r': // R0-R31
15981  if (VT == MVT::i64 && Subtarget.isPPC64())
15982  return std::make_pair(0U, &PPC::G8RCRegClass);
15983  return std::make_pair(0U, &PPC::GPRCRegClass);
15984  // 'd' and 'f' constraints are both defined to be "the floating point
15985  // registers", where one is for 32-bit and the other for 64-bit. We don't
15986  // really care overly much here so just give them all the same reg classes.
15987  case 'd':
15988  case 'f':
15989  if (Subtarget.hasSPE()) {
15990  if (VT == MVT::f32 || VT == MVT::i32)
15991  return std::make_pair(0U, &PPC::GPRCRegClass);
15992  if (VT == MVT::f64 || VT == MVT::i64)
15993  return std::make_pair(0U, &PPC::SPERCRegClass);
15994  } else {
15995  if (VT == MVT::f32 || VT == MVT::i32)
15996  return std::make_pair(0U, &PPC::F4RCRegClass);
15997  if (VT == MVT::f64 || VT == MVT::i64)
15998  return std::make_pair(0U, &PPC::F8RCRegClass);
15999  }
16000  break;
16001  case 'v':
16002  if (Subtarget.hasAltivec() && VT.isVector())
16003  return std::make_pair(0U, &PPC::VRRCRegClass);
16004  else if (Subtarget.hasVSX())
16005  // Scalars in Altivec registers only make sense with VSX.
16006  return std::make_pair(0U, &PPC::VFRCRegClass);
16007  break;
16008  case 'y': // crrc
16009  return std::make_pair(0U, &PPC::CRRCRegClass);
16010  }
16011  } else if (Constraint == "wc" && Subtarget.useCRBits()) {
16012  // An individual CR bit.
16013  return std::make_pair(0U, &PPC::CRBITRCRegClass);
16014  } else if ((Constraint == "wa" || Constraint == "wd" ||
16015  Constraint == "wf" || Constraint == "wi") &&
16016  Subtarget.hasVSX()) {
16017  // A VSX register for either a scalar (FP) or vector. There is no
16018  // support for single precision scalars on subtargets prior to Power8.
16019  if (VT.isVector())
16020  return std::make_pair(0U, &PPC::VSRCRegClass);
16021  if (VT == MVT::f32 && Subtarget.hasP8Vector())
16022  return std::make_pair(0U, &PPC::VSSRCRegClass);
16023  return std::make_pair(0U, &PPC::VSFRCRegClass);
16024  } else if ((Constraint == "ws" || Constraint == "ww") && Subtarget.hasVSX()) {
16025  if (VT == MVT::f32 && Subtarget.hasP8Vector())
16026  return std::make_pair(0U, &PPC::VSSRCRegClass);
16027  else
16028  return std::make_pair(0U, &PPC::VSFRCRegClass);
16029  } else if (Constraint == "lr") {
16030  if (VT == MVT::i64)
16031  return std::make_pair(0U, &PPC::LR8RCRegClass);
16032  else
16033  return std::make_pair(0U, &PPC::LRRCRegClass);
16034  }
16035 
16036  // Handle special cases of physical registers that are not properly handled
16037  // by the base class.
16038  if (Constraint[0] == '{' && Constraint[Constraint.size() - 1] == '}') {
16039  // If we name a VSX register, we can't defer to the base class because it
16040  // will not recognize the correct register (their names will be VSL{0-31}
16041  // and V{0-31} so they won't match). So we match them here.
16042  if (Constraint.size() > 3 && Constraint[1] == 'v' && Constraint[2] == 's') {
16043  int VSNum = atoi(Constraint.data() + 3);
16044  assert(VSNum >= 0 && VSNum <= 63 &&
16045  "Attempted to access a vsr out of range");
16046  if (VSNum < 32)
16047  return std::make_pair(PPC::VSL0 + VSNum, &PPC::VSRCRegClass);
16048  return std::make_pair(PPC::V0 + VSNum - 32, &PPC::VSRCRegClass);
16049  }
16050 
16051  // For float registers, we can't defer to the base class as it will match
16052  // the SPILLTOVSRRC class.
16053  if (Constraint.size() > 3 && Constraint[1] == 'f') {
16054  int RegNum = atoi(Constraint.data() + 2);
16055  if (RegNum > 31 || RegNum < 0)
16056  report_fatal_error("Invalid floating point register number");
16057  if (VT == MVT::f32 || VT == MVT::i32)
16058  return Subtarget.hasSPE()
16059  ? std::make_pair(PPC::R0 + RegNum, &PPC::GPRCRegClass)
16060  : std::make_pair(PPC::F0 + RegNum, &PPC::F4RCRegClass);
16061  if (VT == MVT::f64 || VT == MVT::i64)
16062  return Subtarget.hasSPE()
16063  ? std::make_pair(PPC::S0 + RegNum, &PPC::SPERCRegClass)
16064  : std::make_pair(PPC::F0 + RegNum, &PPC::F8RCRegClass);
16065  }
16066  }
16067 
16068  std::pair<unsigned, const TargetRegisterClass *> R =
16070 
16071  // r[0-9]+ are used, on PPC64, to refer to the corresponding 64-bit registers
16072  // (which we call X[0-9]+). If a 64-bit value has been requested, and a
16073  // 32-bit GPR has been selected, then 'upgrade' it to the 64-bit parent
16074  // register.
16075  // FIXME: If TargetLowering::getRegForInlineAsmConstraint could somehow use
16076  // the AsmName field from *RegisterInfo.td, then this would not be necessary.
16077  if (R.first && VT == MVT::i64 && Subtarget.isPPC64() &&
16078  PPC::GPRCRegClass.contains(R.first))
16079  return std::make_pair(TRI->getMatchingSuperReg(R.first,
16080  PPC::sub_32, &PPC::G8RCRegClass),
16081  &PPC::G8RCRegClass);
16082 
16083  // GCC accepts 'cc' as an alias for 'cr0', and we need to do the same.
16084  if (!R.second && StringRef("{cc}").equals_insensitive(Constraint)) {
16085  R.first = PPC::CR0;
16086  R.second = &PPC::CRRCRegClass;
16087  }
16088  // FIXME: This warning should ideally be emitted in the front end.
16089  const auto &TM = getTargetMachine();
16090  if (Subtarget.isAIXABI() && !TM.getAIXExtendedAltivecABI()) {
16091  if (((R.first >= PPC::V20 && R.first <= PPC::V31) ||
16092  (R.first >= PPC::VF20 && R.first <= PPC::VF31)) &&
16093  (R.second == &PPC::VSRCRegClass || R.second == &PPC::VSFRCRegClass))
16094  errs() << "warning: vector registers 20 to 32 are reserved in the "
16095  "default AIX AltiVec ABI and cannot be used\n";
16096  }
16097 
16098  return R;
16099 }
16100 
16101 /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops
16102 /// vector. If it is invalid, don't add anything to Ops.
16104  std::string &Constraint,
16105  std::vector<SDValue>&Ops,
16106  SelectionDAG &DAG) const {
16107  SDValue Result;
16108 
16109  // Only support length 1 constraints.
16110  if (Constraint.length() > 1) return;
16111 
16112  char Letter = Constraint[0];
16113  switch (Letter) {
16114  default: break;
16115  case 'I':
16116  case 'J':
16117  case 'K':
16118  case 'L':
16119  case 'M':
16120  case 'N':
16121  case 'O':
16122  case 'P': {
16123  ConstantSDNode *CST = dyn_cast<ConstantSDNode>(Op);
16124  if (!CST) return; // Must be an immediate to match.
16125  SDLoc dl(Op);
16126  int64_t Value = CST->getSExtValue();
16127  EVT TCVT = MVT::i64; // All constants taken to be 64 bits so that negative
16128  // numbers are printed as such.
16129  switch (Letter) {
16130  default: llvm_unreachable("Unknown constraint letter!");
16131  case 'I': // "I" is a signed 16-bit constant.
16132  if (isInt<16>(Value))
16133  Result = DAG.getTargetConstant(Value, dl, TCVT);
16134  break;
16135  case 'J': // "J" is a constant with only the high-order 16 bits nonzero.
16136  if (isShiftedUInt<16, 16>(Value))
16137  Result = DAG.getTargetConstant(Value, dl, TCVT);
16138  break;
16139  case 'L': // "L" is a signed 16-bit constant shifted left 16 bits.
16140  if (isShiftedInt<16, 16>(Value))
16141  Result = DAG.getTargetConstant(Value, dl, TCVT);
16142  break;
16143  case 'K': // "K" is a constant with only the low-order 16 bits nonzero.
16144  if (isUInt<16>(Value))
16145  Result = DAG.getTargetConstant(Value, dl, TCVT);
16146  break;
16147  case 'M': // "M" is a constant that is greater than 31.
16148  if (Value > 31)
16149  Result = DAG.getTargetConstant(Value, dl, TCVT);
16150  break;
16151  case 'N': // "N" is a positive constant that is an exact power of two.
16152  if (Value > 0 && isPowerOf2_64(Value))
16153  Result = DAG.getTargetConstant(Value, dl, TCVT);
16154  break;
16155  case 'O': // "O" is the constant zero.
16156  if (Value == 0)
16157  Result = DAG.getTargetConstant(Value, dl, TCVT);
16158  break;
16159  case 'P': // "P" is a constant whose negation is a signed 16-bit constant.
16160  if (isInt<16>(-Value))
16161  Result = DAG.getTargetConstant(Value, dl, TCVT);
16162  break;
16163  }
16164  break;
16165  }
16166  }
16167 
16168  if (Result.getNode()) {
16169  Ops.push_back(Result);
16170  return;
16171  }
16172 
16173  // Handle standard constraint letters.
16174  TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
16175 }
16176 
16177 // isLegalAddressingMode - Return true if the addressing mode represented
16178 // by AM is legal for this target, for a load/store of the specified type.
16180  const AddrMode &AM, Type *Ty,
16181  unsigned AS,
16182  Instruction *I) const {
16183  // Vector type r+i form is supported since power9 as DQ form. We don't check
16184  // the offset matching DQ form requirement(off % 16 == 0), because on PowerPC,
16185  // imm form is preferred and the offset can be adjusted to use imm form later
16186  // in pass PPCLoopInstrFormPrep. Also in LSR, for one LSRUse, it uses min and
16187  // max offset to check legal addressing mode, we should be a little aggressive
16188  // to contain other offsets for that LSRUse.
16189  if (Ty->isVectorTy() && AM.BaseOffs != 0 && !Subtarget.hasP9Vector())
16190  return false;
16191 
16192  // PPC allows a sign-extended 16-bit immediate field.
16193  if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1)
16194  return false;
16195 
16196  // No global is ever allowed as a base.
16197  if (AM.BaseGV)
16198  return false;
16199 
16200  // PPC only support r+r,
16201  switch (AM.Scale) {
16202  case 0: // "r+i" or just "i", depending on HasBaseReg.
16203  break;
16204  case 1:
16205  if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed.
16206  return false;
16207  // Otherwise we have r+r or r+i.
16208  break;
16209  case 2:
16210  if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed.
16211  return false;
16212  // Allow 2*r as r+r.
16213  break;
16214  default:
16215  // No other scales are supported.
16216  return false;
16217  }
16218 
16219  return true;
16220 }
16221 
16222 SDValue PPCTargetLowering::LowerRETURNADDR(SDValue Op,
16223  SelectionDAG &DAG) const {
16224  MachineFunction &MF = DAG.getMachineFunction();
16225  MachineFrameInfo &MFI = MF.getFrameInfo();
16226  MFI.setReturnAddressIsTaken(true);
16227 
16229  return SDValue();
16230 
16231  SDLoc dl(Op);
16232  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16233 
16234  // Make sure the function does not optimize away the store of the RA to
16235  // the stack.
16236  PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
16237  FuncInfo->setLRStoreRequired();
16238  bool isPPC64 = Subtarget.isPPC64();
16239  auto PtrVT = getPointerTy(MF.getDataLayout());
16240 
16241  if (Depth > 0) {
16242  // The link register (return address) is saved in the caller's frame
16243  // not the callee's stack frame. So we must get the caller's frame
16244  // address and load the return address at the LR offset from there.
16245  SDValue FrameAddr =
16246  DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16247  LowerFRAMEADDR(Op, DAG), MachinePointerInfo());
16248  SDValue Offset =
16249  DAG.getConstant(Subtarget.getFrameLowering()->getReturnSaveOffset(), dl,
16250  isPPC64 ? MVT::i64 : MVT::i32);
16251  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(),
16252  DAG.getNode(ISD::ADD, dl, PtrVT, FrameAddr, Offset),
16253  MachinePointerInfo());
16254  }
16255 
16256  // Just load the return address off the stack.
16257  SDValue RetAddrFI = getReturnAddrFrameIndex(DAG);
16258  return DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), RetAddrFI,
16259  MachinePointerInfo());
16260 }
16261 
16262 SDValue PPCTargetLowering::LowerFRAMEADDR(SDValue Op,
16263  SelectionDAG &DAG) const {
16264  SDLoc dl(Op);
16265  unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
16266 
16267  MachineFunction &MF = DAG.getMachineFunction();
16268  MachineFrameInfo &MFI = MF.getFrameInfo();
16269  MFI.setFrameAddressIsTaken(true);
16270 
16271  EVT PtrVT = getPointerTy(MF.getDataLayout());
16272  bool isPPC64 = PtrVT == MVT::i64;
16273 
16274  // Naked functions never have a frame pointer, and so we use r1. For all
16275  // other functions, this decision must be delayed until during PEI.
16276  unsigned FrameReg;
16277  if (MF.getFunction().hasFnAttribute(Attribute::Naked))
16278  FrameReg = isPPC64 ? PPC::X1 : PPC::R1;
16279  else
16280  FrameReg = isPPC64 ? PPC::FP8 : PPC::FP;
16281 
16282  SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg,
16283  PtrVT);
16284  while (Depth--)
16285  FrameAddr = DAG.getLoad(Op.getValueType(), dl, DAG.getEntryNode(),
16286  FrameAddr, MachinePointerInfo());
16287  return FrameAddr;
16288 }
16289 
16290 // FIXME? Maybe this could be a TableGen attribute on some registers and
16291 // this table could be generated automatically from RegInfo.
16293  const MachineFunction &MF) const {
16294  bool isPPC64 = Subtarget.isPPC64();
16295 
16296  bool is64Bit = isPPC64 && VT == LLT::scalar(64);
16297  if (!is64Bit && VT != LLT::scalar(32))
16298  report_fatal_error("Invalid register global variable type");
16299 
16301  .Case("r1", is64Bit ? PPC::X1 : PPC::R1)
16302  .Case("r2", isPPC64 ? Register() : PPC::R2)
16303  .Case("r13", (is64Bit ? PPC::X13 : PPC::R13))
16304  .Default(Register());
16305 
16306  if (Reg)
16307  return Reg;
16308  report_fatal_error("Invalid register name global variable");
16309 }
16310 
16312  // 32-bit SVR4 ABI access everything as got-indirect.
16313  if (Subtarget.is32BitELFABI())
16314  return true;
16315 
16316  // AIX accesses everything indirectly through the TOC, which is similar to
16317  // the GOT.
16318  if (Subtarget.isAIXABI())
16319  return true;
16320 
16322  // If it is small or large code model, module locals are accessed
16323  // indirectly by loading their address from .toc/.got.
16324  if (CModel == CodeModel::Small || CModel == CodeModel::Large)
16325  return true;
16326 
16327  // JumpTable and BlockAddress are accessed as got-indirect.
16328  if (isa<JumpTableSDNode>(GA) || isa<BlockAddressSDNode>(GA))
16329  return true;
16330 
16331  if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(GA))
16332  return Subtarget.isGVIndirectSymbol(G->getGlobal());
16333 
16334  return false;
16335 }
16336 
16337 bool
16339  // The PowerPC target isn't yet aware of offsets.
16340  return false;
16341 }
16342 
16344  const CallInst &I,
16345  MachineFunction &MF,
16346  unsigned Intrinsic) const {
16347  switch (Intrinsic) {
16348  case Intrinsic::ppc_atomicrmw_xchg_i128:
16349  case Intrinsic::ppc_atomicrmw_add_i128:
16350  case Intrinsic::ppc_atomicrmw_sub_i128:
16351  case Intrinsic::ppc_atomicrmw_nand_i128:
16352  case Intrinsic::ppc_atomicrmw_and_i128:
16353  case Intrinsic::ppc_atomicrmw_or_i128:
16354  case Intrinsic::ppc_atomicrmw_xor_i128:
16355  case Intrinsic::ppc_cmpxchg_i128:
16357  Info.memVT = MVT::i128;
16358  Info.ptrVal = I.getArgOperand(0);
16359  Info.offset = 0;
16360  Info.align = Align(16);
16363  return true;
16364  case Intrinsic::ppc_atomic_load_i128:
16366  Info.memVT = MVT::i128;
16367  Info.ptrVal = I.getArgOperand(0);
16368  Info.offset = 0;
16369  Info.align = Align(16);
16371  return true;
16372  case Intrinsic::ppc_atomic_store_i128:
16373  Info.opc = ISD::INTRINSIC_VOID;
16374  Info.memVT = MVT::i128;
16375  Info.ptrVal = I.getArgOperand(2);
16376  Info.offset = 0;
16377  Info.align = Align(16);
16379  return true;
16380  case Intrinsic::ppc_altivec_lvx:
16381  case Intrinsic::ppc_altivec_lvxl:
16382  case Intrinsic::ppc_altivec_lvebx:
16383  case Intrinsic::ppc_altivec_lvehx:
16384  case Intrinsic::ppc_altivec_lvewx:
16385  case Intrinsic::ppc_vsx_lxvd2x:
16386  case Intrinsic::ppc_vsx_lxvw4x:
16387  case Intrinsic::ppc_vsx_lxvd2x_be:
16388  case Intrinsic::ppc_vsx_lxvw4x_be:
16389  case Intrinsic::ppc_vsx_lxvl:
16390  case Intrinsic::ppc_vsx_lxvll: {
16391  EVT VT;
16392  switch (Intrinsic) {
16393  case Intrinsic::ppc_altivec_lvebx:
16394  VT = MVT::i8;
16395  break;
16396  case Intrinsic::ppc_altivec_lvehx:
16397  VT = MVT::i16;
16398  break;
16399  case Intrinsic::ppc_altivec_lvewx:
16400  VT = MVT::i32;
16401  break;
16402  case Intrinsic::ppc_vsx_lxvd2x:
16403  case Intrinsic::ppc_vsx_lxvd2x_be:
16404  VT = MVT::v2f64;
16405  break;
16406  default:
16407  VT = MVT::v4i32;
16408  break;
16409  }
16410 
16412  Info.memVT = VT;
16413  Info.ptrVal = I.getArgOperand(0);
16414  Info.offset = -VT.getStoreSize()+1;
16415  Info.size = 2*VT.getStoreSize()-1;
16416  Info.align = Align(1);
16418  return true;
16419  }
16420  case Intrinsic::ppc_altivec_stvx:
16421  case Intrinsic::ppc_altivec_stvxl:
16422  case Intrinsic::ppc_altivec_stvebx:
16423  case Intrinsic::ppc_altivec_stvehx:
16424  case Intrinsic::ppc_altivec_stvewx:
16425  case Intrinsic::ppc_vsx_stxvd2x:
16426  case Intrinsic::ppc_vsx_stxvw4x:
16427  case Intrinsic::ppc_vsx_stxvd2x_be:
16428  case Intrinsic::ppc_vsx_stxvw4x_be:
16429  case Intrinsic::ppc_vsx_stxvl:
16430  case Intrinsic::ppc_vsx_stxvll: {
16431  EVT VT;
16432  switch (Intrinsic) {
16433  case Intrinsic::ppc_altivec_stvebx:
16434  VT = MVT::i8;
16435  break;
16436  case Intrinsic::ppc_altivec_stvehx:
16437  VT = MVT::i16;
16438  break;
16439  case Intrinsic::ppc_altivec_stvewx:
16440  VT = MVT::i32;
16441  break;
16442  case Intrinsic::ppc_vsx_stxvd2x:
16443  case Intrinsic::ppc_vsx_stxvd2x_be:
16444  VT = MVT::v2f64;
16445  break;
16446  default:
16447  VT = MVT::v4i32;
16448  break;
16449  }
16450 
16451  Info.opc = ISD::INTRINSIC_VOID;
16452  Info.memVT = VT;
16453  Info.ptrVal = I.getArgOperand(1);
16454  Info.offset = -VT.getStoreSize()+1;
16455  Info.size = 2*VT.getStoreSize()-1;
16456  Info.align = Align(1);
16458  return true;
16459  }
16460  default:
16461  break;
16462  }
16463 
16464  return false;
16465 }
16466 
16467 /// It returns EVT::Other if the type should be determined using generic
16468 /// target-independent logic.
16470  const MemOp &Op, const AttributeList &FuncAttributes) const {
16471  if (getTargetMachine().getOptLevel() != CodeGenOpt::None) {
16472  // We should use Altivec/VSX loads and stores when available. For unaligned
16473  // addresses, unaligned VSX loads are only fast starting with the P8.
16474  if (Subtarget.hasAltivec() && Op.size() >= 16 &&
16475  (Op.isAligned(Align(16)) ||
16476  ((Op.isMemset() && Subtarget.hasVSX()) || Subtarget.hasP8Vector())))
16477  return MVT::v4i32;
16478  }
16479 
16480  if (Subtarget.isPPC64()) {
16481  return MVT::i64;
16482  }
16483 
16484  return MVT::i32;
16485 }
16486 
16487 /// Returns true if it is beneficial to convert a load of a constant
16488 /// to just the constant itself.
16490  Type *Ty) const {
16491  assert(Ty->isIntegerTy());
16492 
16493  unsigned BitSize = Ty->getPrimitiveSizeInBits();
16494  return !(BitSize == 0 || BitSize > 64);
16495 }
16496 
16498  if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
16499  return false;
16500  unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
16501  unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
16502  return NumBits1 == 64 && NumBits2 == 32;
16503 }
16504 
16506  if (!VT1.isInteger() || !VT2.isInteger())
16507  return false;
16508  unsigned NumBits1 = VT1.getSizeInBits();
16509  unsigned NumBits2 = VT2.getSizeInBits();
16510  return NumBits1 == 64 && NumBits2 == 32;
16511 }
16512 
16514  // Generally speaking, zexts are not free, but they are free when they can be
16515  // folded with other operations.
16516  if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Val)) {
16517  EVT MemVT = LD->getMemoryVT();
16518  if ((MemVT == MVT::i1 || MemVT == MVT::i8 || MemVT == MVT::i16 ||
16519  (Subtarget.isPPC64() && MemVT == MVT::i32)) &&
16520  (LD->getExtensionType() == ISD::NON_EXTLOAD ||
16521  LD->getExtensionType() == ISD::ZEXTLOAD))
16522  return true;
16523  }
16524 
16525  // FIXME: Add other cases...
16526  // - 32-bit shifts with a zext to i64
16527  // - zext after ctlz, bswap, etc.
16528  // - zext after and by a constant mask
16529 
16530  return TargetLowering::isZExtFree(Val, VT2);
16531 }
16532 
16533 bool PPCTargetLowering::isFPExtFree(EVT DestVT, EVT SrcVT) const {
16534  assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() &&
16535  "invalid fpext types");
16536  // Extending to float128 is not free.
16537  if (DestVT == MVT::f128)
16538  return false;
16539  return true;
16540 }
16541 
16543  return isInt<16>(Imm) || isUInt<16>(Imm);
16544 }
16545 
16547  return isInt<16>(Imm) || isUInt<16>(Imm);
16548 }
16549 
16552  bool *Fast) const {
16553  if (DisablePPCUnaligned)
16554  return false;
16555 
16556  // PowerPC supports unaligned memory access for simple non-vector types.
16557  // Although accessing unaligned addresses is not as efficient as accessing
16558  // aligned addresses, it is generally more efficient than manual expansion,
16559  // and generally only traps for software emulation when crossing page
16560  // boundaries.
16561 
16562  if (!VT.isSimple())
16563  return false;
16564 
16565  if (VT.isFloatingPoint() && !VT.isVector() &&
16566  !Subtarget.allowsUnalignedFPAccess())
16567  return false;
16568 
16569  if (VT.getSimpleVT().isVector()) {
16570  if (Subtarget.hasVSX()) {
16571  if (VT != MVT::v2f64 && VT != MVT::v2i64 &&
16572  VT != MVT::v4f32 && VT != MVT::v4i32)
16573  return false;
16574  } else {
16575  return false;
16576  }
16577  }
16578 
16579  if (VT == MVT::ppcf128)
16580  return false;
16581 
16582  if (Fast)
16583  *Fast = true;
16584 
16585  return true;
16586 }
16587 
16589  SDValue C) const {
16590  // Check integral scalar types.
16591  if (!VT.isScalarInteger())
16592  return false;
16593  if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
16594  if (!ConstNode->getAPIntValue().isSignedIntN(64))
16595  return false;
16596  // This transformation will generate >= 2 operations. But the following
16597  // cases will generate <= 2 instructions during ISEL. So exclude them.
16598  // 1. If the constant multiplier fits 16 bits, it can be handled by one
16599  // HW instruction, ie. MULLI
16600  // 2. If the multiplier after shifted fits 16 bits, an extra shift
16601  // instruction is needed than case 1, ie. MULLI and RLDICR
16602  int64_t Imm = ConstNode->getSExtValue();
16603  unsigned Shift = countTrailingZeros<uint64_t>(Imm);
16604  Imm >>= Shift;
16605  if (isInt<16>(Imm))
16606  return false;
16607  uint64_t UImm = static_cast<uint64_t>(Imm);
16608  if (isPowerOf2_64(UImm + 1) || isPowerOf2_64(UImm - 1) ||
16609  isPowerOf2_64(1 - UImm) || isPowerOf2_64(-1 - UImm))
16610  return true;
16611  }
16612  return false;
16613 }
16614 
16616  EVT VT) const {
16618  MF.getFunction(), VT.getTypeForEVT(MF.getFunction().getContext()));
16619 }
16620 
16622  Type *Ty) const {
16623  switch (Ty->getScalarType()->getTypeID()) {
16624  case Type::FloatTyID:
16625  case Type::DoubleTyID:
16626  return true;
16627  case Type::FP128TyID:
16628  return Subtarget.hasP9Vector();
16629  default:
16630  return false;
16631  }
16632 }
16633 
16634 // FIXME: add more patterns which are not profitable to hoist.
16636  if (!I->hasOneUse())
16637  return true;
16638 
16639  Instruction *User = I->user_back();
16640  assert(User && "A single use instruction with no uses.");
16641 
16642  switch (I->getOpcode()) {
16643  case Instruction::FMul: {
16644  // Don't break FMA, PowerPC prefers FMA.
16645  if (User->getOpcode() != Instruction::FSub &&
16646  User->getOpcode() != Instruction::FAdd)
16647  return true;
16648 
16650  const Function *F = I->getFunction();
16651  const DataLayout &DL = F->getParent()->getDataLayout();
16652  Type *Ty = User->getOperand(0)->getType();
16653 
16654  return !(
16655  isFMAFasterThanFMulAndFAdd(*F, Ty) &&
16657  (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath));
16658  }
16659  case Instruction::Load: {
16660  // Don't break "store (load float*)" pattern, this pattern will be combined
16661  // to "store (load int32)" in later InstCombine pass. See function
16662  // combineLoadToOperationType. On PowerPC, loading a float point takes more
16663  // cycles than loading a 32 bit integer.
16664  LoadInst *LI = cast<LoadInst>(I);
16665  // For the loads that combineLoadToOperationType does nothing, like
16666  // ordered load, it should be profitable to hoist them.
16667  // For swifterror load, it can only be used for pointer to pointer type, so
16668  // later type check should get rid of this case.
16669  if (!LI->isUnordered())
16670  return true;
16671 
16672  if (User->getOpcode() != Instruction::Store)
16673  return true;
16674 
16675  if (I->getType()->getTypeID() != Type::FloatTyID)
16676  return true;
16677 
16678  return false;
16679  }
16680  default:
16681  return true;
16682  }
16683  return true;
16684 }
16685 
16686 const MCPhysReg *
16688  // LR is a callee-save register, but we must treat it as clobbered by any call
16689  // site. Hence we include LR in the scratch registers, which are in turn added
16690  // as implicit-defs for stackmaps and patchpoints. The same reasoning applies
16691  // to CTR, which is used by any indirect call.
16692  static const MCPhysReg ScratchRegs[] = {
16693  PPC::X12, PPC::LR8, PPC::CTR8, 0
16694  };
16695 
16696  return ScratchRegs;
16697 }
16698 
16700  const Constant *PersonalityFn) const {
16701  return Subtarget.isPPC64() ? PPC::X3 : PPC::R3;
16702 }
16703 
16705  const Constant *PersonalityFn) const {
16706  return Subtarget.isPPC64() ? PPC::X4 : PPC::R4;
16707 }
16708 
16709 bool
16711  EVT VT , unsigned DefinedValues) const {
16712  if (VT == MVT::v2i64)
16713  return Subtarget.hasDirectMove(); // Don't need stack ops with direct moves
16714 
16715  if (Subtarget.hasVSX())
16716  return true;
16717 
16718  return TargetLowering::shouldExpandBuildVectorWithShuffles(VT, DefinedValues);
16719 }
16720 
16722  if (DisableILPPref || Subtarget.enableMachineScheduler())
16724 
16725  return Sched::ILP;
16726 }
16727 
16728 // Create a fast isel object.
16729 FastISel *
16731  const TargetLibraryInfo *LibInfo) const {
16732  return PPC::createFastISel(FuncInfo, LibInfo);
16733 }
16734 
16735 // 'Inverted' means the FMA opcode after negating one multiplicand.
16736 // For example, (fma -a b c) = (fnmsub a b c)
16737 static unsigned invertFMAOpcode(unsigned Opc) {
16738  switch (Opc) {
16739  default:
16740  llvm_unreachable("Invalid FMA opcode for PowerPC!");
16741  case ISD::FMA:
16742  return PPCISD::FNMSUB;
16743  case PPCISD::FNMSUB:
16744  return ISD::FMA;
16745  }
16746 }
16747 
16749  bool LegalOps, bool OptForSize,
16750  NegatibleCost &Cost,
16751  unsigned Depth) const {
16753  return SDValue();
16754 
16755  unsigned Opc = Op.getOpcode();
16756  EVT VT = Op.getValueType();
16757  SDNodeFlags Flags = Op.getNode()->getFlags();
16758 
16759  switch (Opc) {
16760  case PPCISD::FNMSUB:
16761  if (!Op.hasOneUse() || !isTypeLegal(VT))
16762  break;
16763 
16765  SDValue N0 = Op.getOperand(0);
16766  SDValue N1 = Op.getOperand(1);
16767  SDValue N2 = Op.getOperand(2);
16768  SDLoc Loc(Op);
16769 
16771  SDValue NegN2 =
16772  getNegatedExpression(N2, DAG, LegalOps, OptForSize, N2Cost, Depth + 1);
16773 
16774  if (!NegN2)
16775  return SDValue();
16776 
16777  // (fneg (fnmsub a b c)) => (fnmsub (fneg a) b (fneg c))
16778  // (fneg (fnmsub a b c)) => (fnmsub a (fneg b) (fneg c))
16779  // These transformations may change sign of zeroes. For example,
16780  // -(-ab-(-c))=-0 while -(-(ab-c))=+0 when a=b=c=1.
16781  if (Flags.hasNoSignedZeros() || Options.NoSignedZerosFPMath) {
16782  // Try and choose the cheaper one to negate.
16784  SDValue NegN0 = getNegatedExpression(N0, DAG, LegalOps, OptForSize,
16785  N0Cost, Depth + 1);
16786 
16788  SDValue NegN1 = getNegatedExpression(N1, DAG, LegalOps, OptForSize,
16789  N1Cost, Depth + 1);
16790 
16791  if (NegN0 && N0Cost <= N1Cost) {
16792  Cost = std::min(N0Cost, N2Cost);
16793  return DAG.getNode(Opc, Loc, VT, NegN0, N1, NegN2, Flags);
16794  } else if (NegN1) {
16795  Cost = std::min(N1Cost, N2Cost);
16796  return DAG.getNode(Opc, Loc, VT, N0, NegN1, NegN2, Flags);
16797  }
16798  }
16799 
16800  // (fneg (fnmsub a b c)) => (fma a b (fneg c))
16801  if (isOperationLegal(ISD::FMA, VT)) {
16802  Cost = N2Cost;
16803  return DAG.getNode(ISD::FMA, Loc, VT, N0, N1, NegN2, Flags);
16804  }
16805 
16806  break;
16807  }
16808 
16809  return TargetLowering::getNegatedExpression(Op, DAG, LegalOps, OptForSize,
16810  Cost, Depth);
16811 }
16812 
16813 // Override to enable LOAD_STACK_GUARD lowering on Linux.
16815  if (!Subtarget.isTargetLinux())
16817  return true;
16818 }
16819 
16820 // Override to disable global variable loading on Linux and insert AIX canary
16821 // word declaration.
16823  if (Subtarget.isAIXABI()) {
16824  M.getOrInsertGlobal(AIXSSPCanaryWordName,
16825  Type::getInt8PtrTy(M.getContext()));
16826  return;
16827  }
16828  if (!Subtarget.isTargetLinux())
16830 }
16831 
16833  if (Subtarget.isAIXABI())
16834  return M.getGlobalVariable(AIXSSPCanaryWordName);
16836 }
16837 
16839  bool ForCodeSize) const {
16840  if (!VT.isSimple() || !Subtarget.hasVSX())
16841  return false;
16842 
16843  switch(VT.getSimpleVT().SimpleTy) {
16844  default:
16845  // For FP types that are currently not supported by PPC backend, return
16846  // false. Examples: f16, f80.
16847  return false;
16848  case MVT::f32:
16849  case MVT::f64:
16850  if (Subtarget.hasPrefixInstrs()) {
16851  // we can materialize all immediatess via XXSPLTI32DX and XXSPLTIDP.
16852  return true;
16853  }
16855  case MVT::ppcf128:
16856  return Imm.isPosZero();
16857  }
16858 }
16859 
16860 // For vector shift operation op, fold
16861 // (op x, (and y, ((1 << numbits(x)) - 1))) -> (target op x, y)
16863  SelectionDAG &DAG) {
16864  SDValue N0 = N->getOperand(0);
16865  SDValue N1 = N->getOperand(1);
16866  EVT VT = N0.getValueType();
16867  unsigned OpSizeInBits = VT.getScalarSizeInBits();
16868  unsigned Opcode = N->getOpcode();
16869  unsigned TargetOpcode;
16870 
16871  switch (Opcode) {
16872  default:
16873  llvm_unreachable("Unexpected shift operation");
16874  case ISD::SHL:
16875  TargetOpcode = PPCISD::SHL;
16876  break;
16877  case ISD::SRL:
16878  TargetOpcode = PPCISD::SRL;
16879  break;
16880  case ISD::SRA:
16881  TargetOpcode = PPCISD::SRA;
16882  break;
16883  }
16884 
16885  if (VT.isVector() && TLI.isOperationLegal(Opcode, VT) &&
16886  N1->getOpcode() == ISD::AND)
16888  if (Mask->getZExtValue() == OpSizeInBits - 1)
16889  return DAG.getNode(TargetOpcode, SDLoc(N), VT, N0, N1->getOperand(0));
16890 
16891  return SDValue();
16892 }
16893 
16894 SDValue PPCTargetLowering::combineSHL(SDNode *N, DAGCombinerInfo &DCI) const {
16895  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16896  return Value;
16897 
16898  SDValue N0 = N->getOperand(0);
16899  ConstantSDNode *CN1 = dyn_cast<ConstantSDNode>(N->getOperand(1));
16900  if (!Subtarget.isISA3_0() || !Subtarget.isPPC64() ||
16901  N0.getOpcode() != ISD::SIGN_EXTEND ||
16902  N0.getOperand(0).getValueType() != MVT::i32 || CN1 == nullptr ||
16903  N->getValueType(0) != MVT::i64)
16904  return SDValue();
16905 
16906  // We can't save an operation here if the value is already extended, and
16907  // the existing shift is easier to combine.
16908  SDValue ExtsSrc = N0.getOperand(0);
16909  if (ExtsSrc.getOpcode() == ISD::TRUNCATE &&
16910  ExtsSrc.getOperand(0).getOpcode() == ISD::AssertSext)
16911  return SDValue();
16912 
16913  SDLoc DL(N0);
16914  SDValue ShiftBy = SDValue(CN1, 0);
16915  // We want the shift amount to be i32 on the extswli, but the shift could
16916  // have an i64.
16917  if (ShiftBy.getValueType() == MVT::i64)
16918  ShiftBy = DCI.DAG.getConstant(CN1->getZExtValue(), DL, MVT::i32);
16919 
16920  return DCI.DAG.getNode(PPCISD::EXTSWSLI, DL, MVT::i64, N0->getOperand(0),
16921  ShiftBy);
16922 }
16923 
16924 SDValue PPCTargetLowering::combineSRA(SDNode *N, DAGCombinerInfo &DCI) const {
16925  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16926  return Value;
16927 
16928  return SDValue();
16929 }
16930 
16931 SDValue PPCTargetLowering::combineSRL(SDNode *N, DAGCombinerInfo &DCI) const {
16932  if (auto Value = stripModuloOnShift(*this, N, DCI.DAG))
16933  return Value;
16934 
16935  return SDValue();
16936 }
16937 
16938 // Transform (add X, (zext(setne Z, C))) -> (addze X, (addic (addi Z, -C), -1))
16939 // Transform (add X, (zext(sete Z, C))) -> (addze X, (subfic (addi Z, -C), 0))
16940 // When C is zero, the equation (addi Z, -C) can be simplified to Z
16941 // Requirement: -C in [-32768, 32767], X and Z are MVT::i64 types
16943  const PPCSubtarget &Subtarget) {
16944  if (!Subtarget.isPPC64())
16945  return SDValue();
16946 
16947  SDValue LHS = N->getOperand(0);
16948  SDValue RHS = N->getOperand(1);
16949 
16950  auto isZextOfCompareWithConstant = [](SDValue Op) {
16951  if (Op.getOpcode() != ISD::ZERO_EXTEND || !Op.hasOneUse() ||
16952  Op.getValueType() != MVT::i64)
16953  return false;
16954 
16955  SDValue Cmp = Op.getOperand(0);
16956  if (Cmp.getOpcode() != ISD::SETCC || !Cmp.hasOneUse() ||
16957  Cmp.getOperand(0).getValueType() != MVT::i64)
16958  return false;
16959 
16960  if (auto *Constant = dyn_cast<ConstantSDNode>(Cmp.getOperand(1))) {
16961  int64_t NegConstant = 0 - Constant->getSExtValue();
16962  // Due to the limitations of the addi instruction,
16963  // -C is required to be [-32768, 32767].
16964  return isInt<16>(NegConstant);
16965  }
16966 
16967  return false;
16968  };
16969 
16970  bool LHSHasPattern = isZextOfCompareWithConstant(LHS);
16971  bool RHSHasPattern = isZextOfCompareWithConstant(RHS);
16972 
16973  // If there is a pattern, canonicalize a zext operand to the RHS.
16974  if (LHSHasPattern && !RHSHasPattern)
16975  std::swap(LHS, RHS);
16976  else if (!LHSHasPattern && !RHSHasPattern)
16977  return SDValue();
16978 
16979  SDLoc DL(N);
16980  SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Glue);
16981  SDValue Cmp = RHS.getOperand(0);
16982  SDValue Z = Cmp.getOperand(0);
16983  auto *Constant = cast<ConstantSDNode>(Cmp.getOperand(1));
16984  int64_t NegConstant = 0 - Constant->getSExtValue();
16985 
16986  switch(cast<CondCodeSDNode>(Cmp.getOperand(2))->get()) {
16987  default: break;
16988  case ISD::SETNE: {
16989  // when C == 0
16990  // --> addze X, (addic Z, -1).carry
16991  // /
16992  // add X, (zext(setne Z, C))--
16993  // \ when -32768 <= -C <= 32767 && C != 0
16994  // --> addze X, (addic (addi Z, -C), -1).carry
16995  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
16996  DAG.getConstant(NegConstant, DL, MVT::i64));
16997  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
16998  SDValue Addc = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
16999  AddOrZ, DAG.getConstant(-1ULL, DL, MVT::i64));
17000  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17001  SDValue(Addc.getNode(), 1));
17002  }
17003  case ISD::SETEQ: {
17004  // when C == 0
17005  // --> addze X, (subfic Z, 0).carry
17006  // /
17007  // add X, (zext(sete Z, C))--
17008  // \ when -32768 <= -C <= 32767 && C != 0
17009  // --> addze X, (subfic (addi Z, -C), 0).carry
17010  SDValue Add = DAG.getNode(ISD::ADD, DL, MVT::i64, Z,
17011  DAG.getConstant(NegConstant, DL, MVT::i64));
17012  SDValue AddOrZ = NegConstant != 0 ? Add : Z;
17013  SDValue Subc = DAG.getNode(ISD::SUBC, DL, DAG.getVTList(MVT::i64, MVT::Glue),
17014  DAG.getConstant(0, DL, MVT::i64), AddOrZ);
17015  return DAG.getNode(ISD::ADDE, DL, VTs, LHS, DAG.getConstant(0, DL, MVT::i64),
17016  SDValue(Subc.getNode(), 1));
17017  }
17018  }
17019 
17020  return SDValue();
17021 }
17022 
17023 // Transform
17024 // (add C1, (MAT_PCREL_ADDR GlobalAddr+C2)) to
17025 // (MAT_PCREL_ADDR GlobalAddr+(C1+C2))
17026 // In this case both C1 and C2 must be known constants.
17027 // C1+C2 must fit into a 34 bit signed integer.
17029  const PPCSubtarget &Subtarget) {
17030  if (!Subtarget.isUsingPCRelativeCalls())
17031  return SDValue();
17032 
17033  // Check both Operand 0 and Operand 1 of the ADD node for the PCRel node.
17034  // If we find that node try to cast the Global Address and the Constant.
17035  SDValue LHS = N->getOperand(0);
17036  SDValue RHS = N->getOperand(1);
17037 
17038  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17039  std::swap(LHS, RHS);
17040 
17041  if (LHS.getOpcode() != PPCISD::MAT_PCREL_ADDR)
17042  return SDValue();
17043 
17044  // Operand zero of PPCISD::MAT_PCREL_ADDR is the GA node.
17045  GlobalAddressSDNode *GSDN = dyn_cast<GlobalAddressSDNode>(LHS.getOperand(0));
17046  ConstantSDNode* ConstNode = dyn_cast<ConstantSDNode>(RHS);
17047 
17048  // Check that both casts succeeded.
17049  if (!GSDN || !ConstNode)
17050  return SDValue();
17051 
17052  int64_t NewOffset = GSDN->getOffset() + ConstNode->getSExtValue();
17053  SDLoc DL(GSDN);
17054 
17055  // The signed int offset needs to fit in 34 bits.
17056  if (!isInt<34>(NewOffset))
17057  return SDValue();
17058 
17059  // The new global address is a copy of the old global address except
17060  // that it has the updated Offset.
17061  SDValue GA =
17062  DAG.getTargetGlobalAddress(GSDN->getGlobal(), DL, GSDN->getValueType(0),
17063  NewOffset, GSDN->getTargetFlags());
17064  SDValue MatPCRel =
17065  DAG.getNode(PPCISD::MAT_PCREL_ADDR, DL, GSDN->getValueType(0), GA);
17066  return MatPCRel;
17067 }
17068 
17069 SDValue PPCTargetLowering::combineADD(SDNode *N, DAGCombinerInfo &DCI) const {
17070  if (auto Value = combineADDToADDZE(N, DCI.DAG, Subtarget))
17071  return Value;
17072 
17073  if (auto Value = combineADDToMAT_PCREL_ADDR(N, DCI.DAG, Subtarget))
17074  return Value;
17075 
17076  return SDValue();
17077 }
17078 
17079 // Detect TRUNCATE operations on bitcasts of float128 values.
17080 // What we are looking for here is the situtation where we extract a subset
17081 // of bits from a 128 bit float.
17082 // This can be of two forms:
17083 // 1) BITCAST of f128 feeding TRUNCATE
17084 // 2) BITCAST of f128 feeding SRL (a shift) feeding TRUNCATE
17085 // The reason this is required is because we do not have a legal i128 type
17086 // and so we want to prevent having to store the f128 and then reload part
17087 // of it.
17088 SDValue PPCTargetLowering::combineTRUNCATE(SDNode *N,
17089  DAGCombinerInfo &DCI) const {
17090  // If we are using CRBits then try that first.
17091  if (Subtarget.useCRBits()) {
17092  // Check if CRBits did anything and return that if it did.
17093  if (SDValue CRTruncValue = DAGCombineTruncBoolExt(N, DCI))
17094  return CRTruncValue;
17095  }
17096 
17097  SDLoc dl(N);
17098  SDValue Op0 = N->getOperand(0);
17099 
17100  // fold (truncate (abs (sub (zext a), (zext b)))) -> (vabsd a, b)
17101  if (Subtarget.hasP9Altivec() && Op0.getOpcode() == ISD::ABS) {
17102  EVT VT = N->getValueType(0);
17103  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17104  return SDValue();
17105  SDValue Sub = Op0.getOperand(0);
17106  if (Sub.getOpcode() == ISD::SUB) {
17107  SDValue SubOp0 = Sub.getOperand(0);
17108  SDValue SubOp1 = Sub.getOperand(1);
17109  if ((SubOp0.getOpcode() == ISD::ZERO_EXTEND) &&
17110  (SubOp1.getOpcode() == ISD::ZERO_EXTEND)) {
17111  return DCI.DAG.getNode(PPCISD::VABSD, dl, VT, SubOp0.getOperand(0),
17112  SubOp1.getOperand(0),
17113  DCI.DAG.getTargetConstant(0, dl, MVT::i32));
17114  }
17115  }
17116  }
17117 
17118  // Looking for a truncate of i128 to i64.
17119  if (Op0.getValueType() != MVT::i128 || N->getValueType(0) != MVT::i64)
17120  return SDValue();
17121 
17122  int EltToExtract = DCI.DAG.getDataLayout().isBigEndian() ? 1 : 0;
17123 
17124  // SRL feeding TRUNCATE.
17125  if (Op0.getOpcode() == ISD::SRL) {
17126  ConstantSDNode *ConstNode = dyn_cast<ConstantSDNode>(Op0.getOperand(1));
17127  // The right shift has to be by 64 bits.
17128  if (!ConstNode || ConstNode->getZExtValue() != 64)
17129  return SDValue();
17130 
17131  // Switch the element number to extract.
17132  EltToExtract = EltToExtract ? 0 : 1;
17133  // Update Op0 past the SRL.
17134  Op0 = Op0.getOperand(0);
17135  }
17136 
17137  // BITCAST feeding a TRUNCATE possibly via SRL.
17138  if (Op0.getOpcode() == ISD::BITCAST &&
17139  Op0.getValueType() == MVT::i128 &&
17140  Op0.getOperand(0).getValueType() == MVT::f128) {
17141  SDValue Bitcast = DCI.DAG.getBitcast(MVT::v2i64, Op0.getOperand(0));
17142  return DCI.DAG.getNode(
17144  DCI.DAG.getTargetConstant(EltToExtract, dl, MVT::i32));
17145  }
17146  return SDValue();
17147 }
17148 
17149 SDValue PPCTargetLowering::combineMUL(SDNode *N, DAGCombinerInfo &DCI) const {
17150  SelectionDAG &DAG = DCI.DAG;
17151 
17152  ConstantSDNode *ConstOpOrElement = isConstOrConstSplat(N->getOperand(1));
17153  if (!ConstOpOrElement)
17154  return SDValue();
17155 
17156  // An imul is usually smaller than the alternative sequence for legal type.
17157  if (DAG.getMachineFunction().getFunction().hasMinSize() &&
17158  isOperationLegal(ISD::MUL, N->getValueType(0)))
17159  return SDValue();
17160 
17161  auto IsProfitable = [this](bool IsNeg, bool IsAddOne, EVT VT) -> bool {
17162  switch (this->Subtarget.getCPUDirective()) {
17163  default:
17164  // TODO: enhance the condition for subtarget before pwr8
17165  return false;
17166  case PPC::DIR_PWR8:
17167  // type mul add shl
17168  // scalar 4 1 1
17169  // vector 7 2 2
17170  return true;
17171  case PPC::DIR_PWR9:
17172  case PPC::DIR_PWR10:
17173  case PPC::DIR_PWR_FUTURE:
17174  // type mul add shl
17175  // scalar 5 2 2
17176  // vector 7 2 2
17177 
17178  // The cycle RATIO of related operations are showed as a table above.
17179  // Because mul is 5(scalar)/7(vector), add/sub/shl are all 2 for both
17180  // scalar and vector type. For 2 instrs patterns, add/sub + shl
17181  // are 4, it is always profitable; but for 3 instrs patterns
17182  // (mul x, -(2^N + 1)) => -(add (shl x, N), x), sub + add + shl are 6.
17183  // So we should only do it for vector type.
17184  return IsAddOne && IsNeg ? VT.isVector() : true;
17185  }
17186  };
17187 
17188  EVT VT = N->getValueType(0);
17189  SDLoc DL(N);
17190 
17191  const APInt &MulAmt = ConstOpOrElement->getAPIntValue();
17192  bool IsNeg = MulAmt.isNegative();
17193  APInt MulAmtAbs = MulAmt.abs();
17194 
17195  if ((MulAmtAbs - 1).isPowerOf2()) {
17196  // (mul x, 2^N + 1) => (add (shl x, N), x)
17197  // (mul x, -(2^N + 1)) => -(add (shl x, N), x)
17198 
17199  if (!IsProfitable(IsNeg, true, VT))
17200  return SDValue();
17201 
17202  SDValue Op0 = N->getOperand(0);
17203  SDValue Op1 =
17204  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17205  DAG.getConstant((MulAmtAbs - 1).logBase2(), DL, VT));
17206  SDValue Res = DAG.getNode(ISD::ADD, DL, VT, Op0, Op1);
17207 
17208  if (!IsNeg)
17209  return Res;
17210 
17211  return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), Res);
17212  } else if ((MulAmtAbs + 1).isPowerOf2()) {
17213  // (mul x, 2^N - 1) => (sub (shl x, N), x)
17214  // (mul x, -(2^N - 1)) => (sub x, (shl x, N))
17215 
17216  if (!IsProfitable(IsNeg, false, VT))
17217  return SDValue();
17218 
17219  SDValue Op0 = N->getOperand(0);
17220  SDValue Op1 =
17221  DAG.getNode(ISD::SHL, DL, VT, N->getOperand(0),
17222  DAG.getConstant((MulAmtAbs + 1).logBase2(), DL, VT));
17223 
17224  if (!IsNeg)
17225  return DAG.getNode(ISD::SUB, DL, VT, Op1, Op0);
17226  else
17227  return DAG.getNode(ISD::SUB, DL, VT, Op0, Op1);
17228 
17229  } else {
17230  return SDValue();
17231  }
17232 }
17233 
17234 // Combine fma-like op (like fnmsub) with fnegs to appropriate op. Do this
17235 // in combiner since we need to check SD flags and other subtarget features.
17236 SDValue PPCTargetLowering::combineFMALike(SDNode *N,
17237  DAGCombinerInfo &DCI) const {
17238  SDValue N0 = N->getOperand(0);
17239  SDValue N1 = N->getOperand(1);
17240  SDValue N2 = N->getOperand(2);
17241  SDNodeFlags Flags = N->getFlags();
17242  EVT VT = N->getValueType(0);
17243  SelectionDAG &DAG = DCI.DAG;
17245  unsigned Opc = N->getOpcode();
17246  bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
17247  bool LegalOps = !DCI.isBeforeLegalizeOps();
17248  SDLoc Loc(N);
17249 
17250  if (!isOperationLegal(ISD::FMA, VT))
17251  return SDValue();
17252 
17253  // Allowing transformation to FNMSUB may change sign of zeroes when ab-c=0
17254  // since (fnmsub a b c)=-0 while c-ab=+0.
17255  if (!Flags.hasNoSignedZeros() && !Options.NoSignedZerosFPMath)
17256  return SDValue();
17257 
17258  // (fma (fneg a) b c) => (fnmsub a b c)
17259  // (fnmsub (fneg a) b c) => (fma a b c)
17260  if (SDValue NegN0 = getCheaperNegatedExpression(N0, DAG, LegalOps, CodeSize))
17261  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, NegN0, N1, N2, Flags);
17262 
17263  // (fma a (fneg b) c) => (fnmsub a b c)
17264  // (fnmsub a (fneg b) c) => (fma a b c)
17265  if (SDValue NegN1 = getCheaperNegatedExpression(N1, DAG, LegalOps, CodeSize))
17266  return DAG.getNode(invertFMAOpcode(Opc), Loc, VT, N0, NegN1, N2, Flags);
17267 
17268  return SDValue();
17269 }
17270 
17271 bool PPCTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
17272  // Only duplicate to increase tail-calls for the 64bit SysV ABIs.
17273  if (!Subtarget.is64BitELFABI())
17274  return false;
17275 
17276  // If not a tail call then no need to proceed.
17277  if (!CI->isTailCall())
17278  return false;
17279 
17280  // If sibling calls have been disabled and tail-calls aren't guaranteed
17281  // there is no reason to duplicate.
17282  auto &TM = getTargetMachine();
17283  if (!TM.Options.GuaranteedTailCallOpt && DisableSCO)
17284  return false;
17285 
17286  // Can't tail call a function called indirectly, or if it has variadic args.
17287  const Function *Callee = CI->getCalledFunction();
17288  if (!Callee || Callee->isVarArg())
17289  return false;
17290 
17291  // Make sure the callee and caller calling conventions are eligible for tco.
17292  const Function *Caller = CI->getParent()->getParent();
17293  if (!areCallingConvEligibleForTCO_64SVR4(Caller->getCallingConv(),
17294  CI->getCallingConv()))
17295  return false;
17296 
17297  // If the function is local then we have a good chance at tail-calling it
17298  return getTargetMachine().shouldAssumeDSOLocal(*Caller->getParent(), Callee);
17299 }
17300 
17301 bool PPCTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
17302  if (!Subtarget.hasVSX())
17303  return false;
17304  if (Subtarget.hasP9Vector() && VT == MVT::f128)
17305  return true;
17306  return VT == MVT::f32 || VT == MVT::f64 ||
17307  VT == MVT::v4f32 || VT == MVT::v2f64;
17308 }
17309 
17310 bool PPCTargetLowering::
17311 isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const {
17312  const Value *Mask = AndI.getOperand(1);
17313  // If the mask is suitable for andi. or andis. we should sink the and.
17314  if (const ConstantInt *CI = dyn_cast<ConstantInt>(Mask)) {
17315  // Can't handle constants wider than 64-bits.
17316  if (CI->getBitWidth() > 64)
17317  return false;
17318  int64_t ConstVal = CI->getZExtValue();
17319  return isUInt<16>(ConstVal) ||
17320  (isUInt<16>(ConstVal >> 16) && !(ConstVal & 0xFFFF));
17321  }
17322 
17323  // For non-constant masks, we can always use the record-form and.
17324  return true;
17325 }
17326 
17327 // Transform (abs (sub (zext a), (zext b))) to (vabsd a b 0)
17328 // Transform (abs (sub (zext a), (zext_invec b))) to (vabsd a b 0)
17329 // Transform (abs (sub (zext_invec a), (zext_invec b))) to (vabsd a b 0)
17330 // Transform (abs (sub (zext_invec a), (zext b))) to (vabsd a b 0)
17331 // Transform (abs (sub a, b) to (vabsd a b 1)) if a & b of type v4i32
17332 SDValue PPCTargetLowering::combineABS(SDNode *N, DAGCombinerInfo &DCI) const {
17333  assert((N->getOpcode() == ISD::ABS) && "Need ABS node here");
17334  assert(Subtarget.hasP9Altivec() &&
17335  "Only combine this when P9 altivec supported!");
17336  EVT VT = N->getValueType(0);
17337  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17338  return SDValue();
17339 
17340  SelectionDAG &DAG = DCI.DAG;
17341  SDLoc dl(N);
17342  if (N->getOperand(0).getOpcode() == ISD::SUB) {
17343  // Even for signed integers, if it's known to be positive (as signed
17344  // integer) due to zero-extended inputs.
17345  unsigned SubOpcd0 = N->getOperand(0)->getOperand(0).getOpcode();
17346  unsigned SubOpcd1 = N->getOperand(0)->getOperand(1).getOpcode();
17347  if ((SubOpcd0 == ISD::ZERO_EXTEND ||
17348  SubOpcd0 == ISD::ZERO_EXTEND_VECTOR_INREG) &&
17349  (SubOpcd1 == ISD::ZERO_EXTEND ||
17350  SubOpcd1 == ISD::ZERO_EXTEND_VECTOR_INREG)) {
17351  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17352  N->getOperand(0)->getOperand(0),
17353  N->getOperand(0)->getOperand(1),
17354  DAG.getTargetConstant(0, dl, MVT::i32));
17355  }
17356 
17357  // For type v4i32, it can be optimized with xvnegsp + vabsduw
17358  if (N->getOperand(0).getValueType() == MVT::v4i32 &&
17359  N->getOperand(0).hasOneUse()) {
17360  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(0).getValueType(),
17361  N->getOperand(0)->getOperand(0),
17362  N->getOperand(0)->getOperand(1),
17363  DAG.getTargetConstant(1, dl, MVT::i32));
17364  }
17365  }
17366 
17367  return SDValue();
17368 }
17369 
17370 // For type v4i32/v8ii16/v16i8, transform
17371 // from (vselect (setcc a, b, setugt), (sub a, b), (sub b, a)) to (vabsd a, b)
17372 // from (vselect (setcc a, b, setuge), (sub a, b), (sub b, a)) to (vabsd a, b)
17373 // from (vselect (setcc a, b, setult), (sub b, a), (sub a, b)) to (vabsd a, b)
17374 // from (vselect (setcc a, b, setule), (sub b, a), (sub a, b)) to (vabsd a, b)
17375 SDValue PPCTargetLowering::combineVSelect(SDNode *N,
17376  DAGCombinerInfo &DCI) const {
17377  assert((N->getOpcode() == ISD::VSELECT) && "Need VSELECT node here");
17378  assert(Subtarget.hasP9Altivec() &&
17379  "Only combine this when P9 altivec supported!");
17380 
17381  SelectionDAG &DAG = DCI.DAG;
17382  SDLoc dl(N);
17383  SDValue Cond = N->getOperand(0);
17384  SDValue TrueOpnd = N->getOperand(1);
17385  SDValue FalseOpnd = N->getOperand(2);
17386  EVT VT = N->getOperand(1).getValueType();
17387 
17388  if (Cond.getOpcode() != ISD::SETCC || TrueOpnd.getOpcode() != ISD::SUB ||
17389  FalseOpnd.getOpcode() != ISD::SUB)
17390  return SDValue();
17391 
17392  // ABSD only available for type v4i32/v8i16/v16i8
17393  if (VT != MVT::v4i32 && VT != MVT::v8i16 && VT != MVT::v16i8)
17394  return SDValue();
17395 
17396  // At least to save one more dependent computation
17397  if (!(Cond.hasOneUse() || TrueOpnd.hasOneUse() || FalseOpnd.hasOneUse()))
17398  return SDValue();
17399 
17400  ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
17401 
17402  // Can only handle unsigned comparison here
17403  switch (CC) {
17404  default:
17405  return SDValue();
17406  case ISD::SETUGT:
17407  case ISD::SETUGE:
17408  break;
17409  case ISD::SETULT:
17410  case ISD::SETULE:
17411  std::swap(TrueOpnd, FalseOpnd);
17412  break;
17413  }
17414 
17415  SDValue CmpOpnd1 = Cond.getOperand(0);
17416  SDValue CmpOpnd2 = Cond.getOperand(1);
17417 
17418  // SETCC CmpOpnd1 CmpOpnd2 cond
17419  // TrueOpnd = CmpOpnd1 - CmpOpnd2
17420  // FalseOpnd = CmpOpnd2 - CmpOpnd1
17421  if (TrueOpnd.getOperand(0) == CmpOpnd1 &&
17422  TrueOpnd.getOperand(1) == CmpOpnd2 &&
17423  FalseOpnd.getOperand(0) == CmpOpnd2 &&
17424  FalseOpnd.getOperand(1) == CmpOpnd1) {
17425  return DAG.getNode(PPCISD::VABSD, dl, N->getOperand(1).getValueType(),
17426  CmpOpnd1, CmpOpnd2,
17427  DAG.getTargetConstant(0, dl, MVT::i32));
17428  }
17429 
17430  return SDValue();
17431 }
17432 
17433 /// getAddrModeForFlags - Based on the set of address flags, select the most
17434 /// optimal instruction format to match by.
17435 PPC::AddrMode PPCTargetLowering::getAddrModeForFlags(unsigned Flags) const {
17436  // This is not a node we should be handling here.
17437  if (Flags == PPC::MOF_None)
17438  return PPC::AM_None;
17439  // Unaligned D-Forms are tried first, followed by the aligned D-Forms.
17440  for (auto FlagSet : AddrModesMap.at(PPC::AM_DForm))
17441  if ((Flags & FlagSet) == FlagSet)
17442  return PPC::AM_DForm;
17443  for (auto FlagSet : AddrModesMap.at(PPC::AM_DSForm))
17444  if ((Flags & FlagSet) == FlagSet)
17445  return PPC::AM_DSForm;
17446  for (auto FlagSet : AddrModesMap.at(PPC::AM_DQForm))
17447  if ((Flags & FlagSet) == FlagSet)
17448  return PPC::AM_DQForm;
17449  for (auto FlagSet : AddrModesMap.at(PPC::AM_PrefixDForm))
17450  if ((Flags & FlagSet) == FlagSet)
17451  return PPC::AM_PrefixDForm;
17452  // If no other forms are selected, return an X-Form as it is the most
17453  // general addressing mode.
17454  return PPC::AM_XForm;
17455 }
17456 
17457 /// Set alignment flags based on whether or not the Frame Index is aligned.
17458 /// Utilized when computing flags for address computation when selecting
17459 /// load and store instructions.
17460 static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet,
17461  SelectionDAG &DAG) {
17462  bool IsAdd = ((N.getOpcode() == ISD::ADD) || (N.getOpcode() == ISD::OR));
17463  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(IsAdd ? N.getOperand(0) : N);
17464  if (!FI)
17465  return;
17466  const MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
17467  unsigned FrameIndexAlign = MFI.getObjectAlign(FI->getIndex()).value();
17468  // If this is (add $FI, $S16Imm), the alignment flags are already set
17469  // based on the immediate. We just need to clear the alignment flags
17470  // if the FI alignment is weaker.
17471  if ((FrameIndexAlign % 4) != 0)
17472  FlagSet &= ~PPC::MOF_RPlusSImm16Mult4;
17473  if ((FrameIndexAlign % 16) != 0)
17474  FlagSet &= ~PPC::MOF_RPlusSImm16Mult16;
17475  // If the address is a plain FrameIndex, set alignment flags based on
17476  // FI alignment.
17477  if (!IsAdd) {
17478  if ((FrameIndexAlign % 4) == 0)
17479  FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17480  if ((FrameIndexAlign % 16) == 0)
17481  FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17482  }
17483 }
17484 
17485 /// Given a node, compute flags that are used for address computation when
17486 /// selecting load and store instructions. The flags computed are stored in
17487 /// FlagSet. This function takes into account whether the node is a constant,
17488 /// an ADD, OR, or a constant, and computes the address flags accordingly.
17489 static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet,
17490  SelectionDAG &DAG) {
17491  // Set the alignment flags for the node depending on if the node is
17492  // 4-byte or 16-byte aligned.
17493  auto SetAlignFlagsForImm = [&](uint64_t Imm) {
17494  if ((Imm & 0x3) == 0)
17495  FlagSet |= PPC::MOF_RPlusSImm16Mult4;
17496  if ((Imm & 0xf) == 0)
17497  FlagSet |= PPC::MOF_RPlusSImm16Mult16;
17498  };
17499 
17500  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
17501  // All 32-bit constants can be computed as LIS + Disp.
17502  const APInt &ConstImm = CN->getAPIntValue();
17503  if (ConstImm.isSignedIntN(32)) { // Flag to handle 32-bit constants.
17504  FlagSet |= PPC::MOF_AddrIsSImm32;
17505  SetAlignFlagsForImm(ConstImm.getZExtValue());
17506  setAlignFlagsForFI(N, FlagSet, DAG);
17507  }
17508  if (ConstImm.isSignedIntN(34)) // Flag to handle 34-bit constants.
17509  FlagSet |= PPC::MOF_RPlusSImm34;
17510  else // Let constant materialization handle large constants.
17511  FlagSet |= PPC::MOF_NotAddNorCst;
17512  } else if (N.getOpcode() == ISD::ADD || provablyDisjointOr(DAG, N)) {
17513  // This address can be represented as an addition of:
17514  // - Register + Imm16 (possibly a multiple of 4/16)
17515  // - Register + Imm34
17516  // - Register + PPCISD::Lo
17517  // - Register + Register
17518  // In any case, we won't have to match this as Base + Zero.
17519  SDValue RHS = N.getOperand(1);
17520  if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(RHS)) {
17521  const APInt &ConstImm = CN->getAPIntValue();
17522  if (ConstImm.isSignedIntN(16)) {
17523  FlagSet |= PPC::MOF_RPlusSImm16; // Signed 16-bit immediates.
17524  SetAlignFlagsForImm(ConstImm.getZExtValue());
17525  setAlignFlagsForFI(N, FlagSet, DAG);
17526  }
17527  if (ConstImm.isSignedIntN(34))
17528  FlagSet |= PPC::MOF_RPlusSImm34; // Signed 34-bit immediates.
17529  else
17530  FlagSet |= PPC::MOF_RPlusR; // Register.
17531  } else if (RHS.getOpcode() == PPCISD::Lo &&
17532  !cast<ConstantSDNode>(RHS.getOperand(1))->getZExtValue())
17533  FlagSet |= PPC::MOF_RPlusLo; // PPCISD::Lo.
17534  else
17535  FlagSet |= PPC::MOF_RPlusR;
17536  } else { // The address computation is not a constant or an addition.
17537  setAlignFlagsForFI(N, FlagSet, DAG);
17538  FlagSet |= PPC::MOF_NotAddNorCst;
17539  }
17540 }
17541 
17542 static bool isPCRelNode(SDValue N) {
17543  return (N.getOpcode() == PPCISD::MAT_PCREL_ADDR ||
17544  isValidPCRelNode<ConstantPoolSDNode>(N) ||
17545  isValidPCRelNode<GlobalAddressSDNode>(N) ||
17546  isValidPCRelNode<JumpTableSDNode>(N) ||
17547  isValidPCRelNode<BlockAddressSDNode>(N));
17548 }
17549 
17550 /// computeMOFlags - Given a node N and it's Parent (a MemSDNode), compute
17551 /// the address flags of the load/store instruction that is to be matched.
17552 unsigned PPCTargetLowering::computeMOFlags(const SDNode *Parent, SDValue N,
17553  SelectionDAG &DAG) const {
17554  unsigned FlagSet = PPC::MOF_None;
17555 
17556  // Compute subtarget flags.
17557  if (!Subtarget.hasP9Vector())
17558  FlagSet |= PPC::MOF_SubtargetBeforeP9;
17559  else {
17560  FlagSet |= PPC::MOF_SubtargetP9;
17561  if (Subtarget.hasPrefixInstrs())
17562  FlagSet |= PPC::MOF_SubtargetP10;
17563  }
17564  if (Subtarget.hasSPE())
17565  FlagSet |= PPC::MOF_SubtargetSPE;
17566 
17567  // Check if we have a PCRel node and return early.
17568  if ((FlagSet & PPC::MOF_SubtargetP10) && isPCRelNode(N))
17569  return FlagSet;
17570 
17571  // If the node is the paired load/store intrinsics, compute flags for
17572  // address computation and return early.
17573  unsigned ParentOp = Parent->getOpcode();
17574  if (Subtarget.isISA3_1() && ((ParentOp == ISD::INTRINSIC_W_CHAIN) ||
17575  (ParentOp == ISD::INTRINSIC_VOID))) {
17576  unsigned ID = cast<ConstantSDNode>(Parent->getOperand(1))->getZExtValue();
17577  if ((ID == Intrinsic::ppc_vsx_lxvp) || (ID == Intrinsic::ppc_vsx_stxvp)) {
17578  SDValue IntrinOp = (ID == Intrinsic::ppc_vsx_lxvp)
17579  ? Parent->getOperand(2)
17580  : Parent->getOperand(3);
17581  computeFlagsForAddressComputation(IntrinOp, FlagSet, DAG);
17582  FlagSet |= PPC::MOF_Vector;
17583  return FlagSet;
17584  }
17585  }
17586 
17587  // Mark this as something we don't want to handle here if it is atomic
17588  // or pre-increment instruction.
17589  if (const LSBaseSDNode *LSB = dyn_cast<LSBaseSDNode>(Parent))
17590  if (LSB->isIndexed())
17591  return PPC::MOF_None;
17592 
17593  // Compute in-memory type flags. This is based on if there are scalars,
17594  // floats or vectors.
17595  const MemSDNode *MN = dyn_cast<MemSDNode>(Parent);
17596  assert(MN && "Parent should be a MemSDNode!");
17597  EVT MemVT = MN->getMemoryVT();
17598  unsigned Size = MemVT.getSizeInBits();
17599  if (MemVT.isScalarInteger()) {
17600  assert(Size <= 128 &&
17601  "Not expecting scalar integers larger than 16 bytes!");
17602  if (Size < 32)
17603  FlagSet |= PPC::MOF_SubWordInt;
17604  else if (Size == 32)
17605  FlagSet |= PPC::MOF_WordInt;
17606  else
17607  FlagSet |= PPC::MOF_DoubleWordInt;
17608  } else if (MemVT.isVector() && !MemVT.isFloatingPoint()) { // Integer vectors.
17609  if (Size == 128)
17610  FlagSet |= PPC::MOF_Vector;
17611  else if (Size == 256) {
17612  assert(Subtarget.pairedVectorMemops() &&
17613  "256-bit vectors are only available when paired vector memops is "
17614  "enabled!");
17615  FlagSet |= PPC::MOF_Vector;
17616  } else
17617  llvm_unreachable("Not expecting illegal vectors!");
17618  } else { // Floating point type: can be scalar, f128 or vector types.
17619  if (Size == 32 || Size == 64)
17620  FlagSet |= PPC::MOF_ScalarFloat;
17621  else if (MemVT == MVT::f128 || MemVT.isVector())
17622  FlagSet |= PPC::MOF_Vector;
17623  else
17624  llvm_unreachable("Not expecting illegal scalar floats!");
17625  }
17626 
17627  // Compute flags for address computation.
17628  computeFlagsForAddressComputation(N, FlagSet, DAG);
17629 
17630  // Compute type extension flags.
17631  if (const LoadSDNode *LN = dyn_cast<LoadSDNode>(Parent)) {
17632  switch (LN->getExtensionType()) {
17633  case ISD::SEXTLOAD:
17634  FlagSet |= PPC::MOF_SExt;
17635  break;
17636  case ISD::EXTLOAD:
17637  case ISD::ZEXTLOAD:
17638  FlagSet |= PPC::MOF_ZExt;
17639  break;
17640  case ISD::NON_EXTLOAD:
17641  FlagSet |= PPC::MOF_NoExt;
17642  break;
17643  }
17644  } else
17645  FlagSet |= PPC::MOF_NoExt;
17646 
17647  // For integers, no extension is the same as zero extension.
17648  // We set the extension mode to zero extension so we don't have
17649  // to add separate entries in AddrModesMap for loads and stores.
17650  if (MemVT.isScalarInteger() && (FlagSet & PPC::MOF_NoExt)) {
17651  FlagSet |= PPC::MOF_ZExt;
17652  FlagSet &= ~PPC::MOF_NoExt;
17653  }
17654 
17655  // If we don't have prefixed instructions, 34-bit constants should be
17656  // treated as PPC::MOF_NotAddNorCst so they can match D-Forms.
17657  bool IsNonP1034BitConst =
17659  FlagSet) == PPC::MOF_RPlusSImm34;
17660  if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::OR &&
17661  IsNonP1034BitConst)
17662  FlagSet |= PPC::MOF_NotAddNorCst;
17663 
17664  return FlagSet;
17665 }
17666 
17667 /// SelectForceXFormMode - Given the specified address, force it to be
17668 /// represented as an indexed [r+r] operation (an XForm instruction).
17670  SDValue &Base,
17671  SelectionDAG &DAG) const {
17672 
17674  int16_t ForceXFormImm = 0;
17675  if (provablyDisjointOr(DAG, N) &&
17676  !isIntS16Immediate(N.getOperand(1), ForceXFormImm)) {
17677  Disp = N.getOperand(0);
17678  Base = N.getOperand(1);
17679  return Mode;
17680  }
17681 
17682  // If the address is the result of an add, we will utilize the fact that the
17683  // address calculation includes an implicit add. However, we can reduce
17684  // register pressure if we do not materialize a constant just for use as the
17685  // index register. We only get rid of the add if it is not an add of a
17686  // value and a 16-bit signed constant and both have a single use.
17687  if (N.getOpcode() == ISD::ADD &&
17688  (!isIntS16Immediate(N.getOperand(1), ForceXFormImm) ||
17689  !N.getOperand(1).hasOneUse() || !N.getOperand(0).hasOneUse())) {
17690  Disp = N.getOperand(0);
17691  Base = N.getOperand(1);
17692  return Mode;
17693  }
17694 
17695  // Otherwise, use R0 as the base register.
17696  Disp = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17697  N.getValueType());
17698  Base = N;
17699 
17700  return Mode;
17701 }
17702 
17704  SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
17705  unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
17706  EVT ValVT = Val.getValueType();
17707  // If we are splitting a scalar integer into f64 parts (i.e. so they
17708  // can be placed into VFRC registers), we need to zero extend and
17709  // bitcast the values. This will ensure the value is placed into a
17710  // VSR using direct moves or stack operations as needed.
17711  if (PartVT == MVT::f64 &&
17712  (ValVT == MVT::i32 || ValVT == MVT::i16 || ValVT == MVT::i8)) {
17713  Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
17714  Val = DAG.getNode(ISD::BITCAST, DL, MVT::f64, Val);
17715  Parts[0] = Val;
17716  return true;
17717  }
17718  return false;
17719 }
17720 
17721 // If we happen to match to an aligned D-Form, check if the Frame Index is
17722 // adequately aligned. If it is not, reset the mode to match to X-Form.
17723 static void setXFormForUnalignedFI(SDValue N, unsigned Flags,
17724  PPC::AddrMode &Mode) {
17725  if (!isa<FrameIndexSDNode>(N))
17726  return;
17727  if ((Mode == PPC::AM_DSForm && !(Flags & PPC::MOF_RPlusSImm16Mult4)) ||
17728  (Mode == PPC::AM_DQForm && !(Flags & PPC::MOF_RPlusSImm16Mult16)))
17729  Mode = PPC::AM_XForm;
17730 }
17731 
17732 /// SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode),
17733 /// compute the address flags of the node, get the optimal address mode based
17734 /// on the flags, and set the Base and Disp based on the address mode.
17736  SDValue N, SDValue &Disp,
17737  SDValue &Base,
17738  SelectionDAG &DAG,
17739  MaybeAlign Align) const {
17740  SDLoc DL(Parent);
17741 
17742  // Compute the address flags.
17743  unsigned Flags = computeMOFlags(Parent, N, DAG);
17744 
17745  // Get the optimal address mode based on the Flags.
17746  PPC::AddrMode Mode = getAddrModeForFlags(Flags);
17747 
17748  // If the address mode is DS-Form or DQ-Form, check if the FI is aligned.
17749  // Select an X-Form load if it is not.
17750  setXFormForUnalignedFI(N, Flags, Mode);
17751 
17752  // Set the mode to PC-Relative addressing mode if we have a valid PC-Rel node.
17753  if ((Mode == PPC::AM_XForm) && isPCRelNode(N)) {
17754  assert(Subtarget.isUsingPCRelativeCalls() &&
17755  "Must be using PC-Relative calls when a valid PC-Relative node is "
17756  "present!");
17757  Mode = PPC::AM_PCRel;
17758  }
17759 
17760  // Set Base and Disp accordingly depending on the address mode.
17761  switch (Mode) {
17762  case PPC::AM_DForm:
17763  case PPC::AM_DSForm:
17764  case PPC::AM_DQForm: {
17765  // This is a register plus a 16-bit immediate. The base will be the
17766  // register and the displacement will be the immediate unless it
17767  // isn't sufficiently aligned.
17768  if (Flags & PPC::MOF_RPlusSImm16) {
17769  SDValue Op0 = N.getOperand(0);
17770  SDValue Op1 = N.getOperand(1);
17771  int16_t Imm = cast<ConstantSDNode>(Op1)->getAPIntValue().getZExtValue();
17772  if (!Align || isAligned(*Align, Imm)) {
17773  Disp = DAG.getTargetConstant(Imm, DL, N.getValueType());
17774  Base = Op0;
17775  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Op0)) {
17776  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
17777  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
17778  }
17779  break;
17780  }
17781  }
17782  // This is a register plus the @lo relocation. The base is the register
17783  // and the displacement is the global address.
17784  else if (Flags & PPC::MOF_RPlusLo) {
17785  Disp = N.getOperand(1).getOperand(0); // The global address.
17788  Disp.getOpcode() == ISD::TargetConstantPool ||
17789  Disp.getOpcode() == ISD::TargetJumpTable);
17790  Base = N.getOperand(0);
17791  break;
17792  }
17793  // This is a constant address at most 32 bits. The base will be
17794  // zero or load-immediate-shifted and the displacement will be
17795  // the low 16 bits of the address.
17796  else if (Flags & PPC::MOF_AddrIsSImm32) {
17797  auto *CN = cast<ConstantSDNode>(N);
17798  EVT CNType = CN->getValueType(0);
17799  uint64_t CNImm = CN->getZExtValue();
17800  // If this address fits entirely in a 16-bit sext immediate field, codegen
17801  // this as "d, 0".
17802  int16_t Imm;
17803  if (isIntS16Immediate(CN, Imm) && (!Align || isAligned(*Align, Imm))) {
17804  Disp = DAG.getTargetConstant(Imm, DL, CNType);
17805  Base = DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17806  CNType);
17807  break;
17808  }
17809  // Handle 32-bit sext immediate with LIS + Addr mode.
17810  if ((CNType == MVT::i32 || isInt<32>(CNImm)) &&
17811  (!Align || isAligned(*Align, CNImm))) {
17812  int32_t Addr = (int32_t)CNImm;
17813  // Otherwise, break this down into LIS + Disp.
17814  Disp = DAG.getTargetConstant((int16_t)Addr, DL, MVT::i32);
17815  Base =
17816  DAG.getTargetConstant((Addr - (int16_t)Addr) >> 16, DL, MVT::i32);
17817  uint32_t LIS = CNType == MVT::i32 ? PPC::LIS : PPC::LIS8;
17818  Base = SDValue(DAG.getMachineNode(LIS, DL, CNType, Base), 0);
17819  break;
17820  }
17821  }
17822  // Otherwise, the PPC:MOF_NotAdd flag is set. Load/Store is Non-foldable.
17823  Disp = DAG.getTargetConstant(0, DL, getPointerTy(DAG.getDataLayout()));
17824  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N)) {
17825  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
17826  fixupFuncForFI(DAG, FI->getIndex(), N.getValueType());
17827  } else
17828  Base = N;
17829  break;
17830  }
17831  case PPC::AM_PrefixDForm: {
17832  int64_t Imm34 = 0;
17833  unsigned Opcode = N.getOpcode();
17834  if (((Opcode == ISD::ADD) || (Opcode == ISD::OR)) &&
17835  (isIntS34Immediate(N.getOperand(1), Imm34))) {
17836  // N is an Add/OR Node, and it's operand is a 34-bit signed immediate.
17837  Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
17838  if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0)))
17839  Base = DAG.getTargetFrameIndex(FI->getIndex(), N.getValueType());
17840  else
17841  Base = N.getOperand(0);
17842  } else if (isIntS34Immediate(N, Imm34)) {
17843  // The address is a 34-bit signed immediate.
17844  Disp = DAG.getTargetConstant(Imm34, DL, N.getValueType());
17845  Base = DAG.getRegister(PPC::ZERO8, N.getValueType());
17846  }
17847  break;
17848  }
17849  case PPC::AM_PCRel: {
17850  // When selecting PC-Relative instructions, "Base" is not utilized as
17851  // we select the address as [PC+imm].
17852  Disp = N;
17853  break;
17854  }
17855  case PPC::AM_None:
17856  break;
17857  default: { // By default, X-Form is always available to be selected.
17858  // When a frame index is not aligned, we also match by XForm.
17859  FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N);
17860  Base = FI ? N : N.getOperand(1);
17861  Disp = FI ? DAG.getRegister(Subtarget.isPPC64() ? PPC::ZERO8 : PPC::ZERO,
17862  N.getValueType())
17863  : N.getOperand(0);
17864  break;
17865  }
17866  }
17867  return Mode;
17868 }
17869 
17871  bool Return,
17872  bool IsVarArg) const {
17873  switch (CC) {
17874  case CallingConv::Cold:
17875  return (Return ? RetCC_PPC_Cold : CC_PPC64_ELF_FIS);
17876  default:
17877  return CC_PPC64_ELF_FIS;
17878  }
17879 }
17880 
17883  unsigned Size = AI->getType()->getPrimitiveSizeInBits();
17884  if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
17887 }
17888 
17891  unsigned Size = AI->getNewValOperand()->getType()->getPrimitiveSizeInBits();
17892  if (EnableQuadwordAtomics && Subtarget.hasQuadwordAtomics() && Size == 128)
17895 }
17896 
17897 static Intrinsic::ID
17899  switch (BinOp) {
17900  default:
17901  llvm_unreachable("Unexpected AtomicRMW BinOp");
17902  case AtomicRMWInst::Xchg:
17903  return Intrinsic::ppc_atomicrmw_xchg_i128;
17904  case AtomicRMWInst::Add:
17905  return Intrinsic::ppc_atomicrmw_add_i128;
17906  case AtomicRMWInst::Sub:
17907  return Intrinsic::ppc_atomicrmw_sub_i128;
17908  case AtomicRMWInst::And:
17909  return Intrinsic::ppc_atomicrmw_and_i128;
17910  case AtomicRMWInst::Or:
17911  return Intrinsic::ppc_atomicrmw_or_i128;
17912  case AtomicRMWInst::Xor:
17913  return Intrinsic::ppc_atomicrmw_xor_i128;
17914  case AtomicRMWInst::Nand:
17915  return Intrinsic::ppc_atomicrmw_nand_i128;
17916  }
17917 }
17918 
17920  IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
17921  Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
17923  "Only support quadword now");
17924  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17925  Type *ValTy = AlignedAddr->getType()->getPointerElementType();
17926  assert(ValTy->getPrimitiveSizeInBits() == 128);
17929  Type *Int64Ty = Type::getInt64Ty(M->getContext());
17930  Value *IncrLo = Builder.CreateTrunc(Incr, Int64Ty, "incr_lo");
17931  Value *IncrHi =
17932  Builder.CreateTrunc(Builder.CreateLShr(Incr, 64), Int64Ty, "incr_hi");
17933  Value *Addr =
17934  Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
17935  Value *LoHi = Builder.CreateCall(RMW, {Addr, IncrLo, IncrHi});
17936  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
17937  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
17938  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
17939  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
17940  return Builder.CreateOr(
17941  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
17942 }
17943 
17945  IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
17946  Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
17948  "Only support quadword now");
17949  Module *M = Builder.GetInsertBlock()->getParent()->getParent();
17950  Type *ValTy = AlignedAddr->getType()->getPointerElementType();
17951  assert(ValTy->getPrimitiveSizeInBits() == 128);
17952  Function *IntCmpXchg =
17953  Intrinsic::getDeclaration(M, Intrinsic::ppc_cmpxchg_i128);
17954  Type *Int64Ty = Type::getInt64Ty(M->getContext());
17955  Value *CmpLo = Builder.CreateTrunc(CmpVal, Int64Ty, "cmp_lo");
17956  Value *CmpHi =
17957  Builder.CreateTrunc(Builder.CreateLShr(CmpVal, 64), Int64Ty, "cmp_hi");
17958  Value *NewLo = Builder.CreateTrunc(NewVal, Int64Ty, "new_lo");
17959  Value *NewHi =
17960  Builder.CreateTrunc(Builder.CreateLShr(NewVal, 64), Int64Ty, "new_hi");
17961  Value *Addr =
17962  Builder.CreateBitCast(AlignedAddr, Type::getInt8PtrTy(M->getContext()));
17963  emitLeadingFence(Builder, CI, Ord);
17964  Value *LoHi =
17965  Builder.CreateCall(IntCmpXchg, {Addr, CmpLo, CmpHi, NewLo, NewHi});
17966  emitTrailingFence(Builder, CI, Ord);
17967  Value *Lo = Builder.CreateExtractValue(LoHi, 0, "lo");
17968  Value *Hi = Builder.CreateExtractValue(LoHi, 1, "hi");
17969  Lo = Builder.CreateZExt(Lo, ValTy, "lo64");
17970  Hi = Builder.CreateZExt(Hi, ValTy, "hi64");
17971  return Builder.CreateOr(
17972  Lo, Builder.CreateShl(Hi, ConstantInt::get(ValTy, 64)), "val64");
17973 }
return AArch64::GPR64RegClass contains(Reg)
unsigned const MachineRegisterInfo * MRI
#define Success
static const unsigned PerfectShuffleTable[6561+1]
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
unsigned Intr
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
This file declares a class to represent arbitrary precision floating point values and provide a varie...
This file implements a class to represent arbitrary precision integral constant values and operations...
static bool isLoad(int Opcode)
@ OP_COPY
Function Alias Analysis Results
assume Assume Builder
Atomic ordering constants.
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:294
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static RegisterPass< DebugifyModulePass > DM("debugify", "Attach debug info to everything")
This file defines the DenseMap class.
uint64_t Align
uint64_t Offset
uint64_t Addr
uint32_t Index
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
lazy value info
loop rotate
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
#define R4(n)
#define R2(n)
#define R6(n)
#define T
Module.h This file contains the declarations for the Module class.
Value * Param
uint64_t CallInst * C
This file provides None, an enumerator for use in implicit constructors of various (usually templated...
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
static cl::opt< bool > DisableSCO("disable-ppc-sco", cl::desc("disable sibling call optimization on ppc"), cl::Hidden)
static SDValue getCanonicalConstSplat(uint64_t Val, unsigned SplatSize, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
getCanonicalConstSplat - Build a canonical splat immediate of Val with an element size of SplatSize.
static unsigned getCallOpcode(PPCTargetLowering::CallFlags CFlags, const Function &Caller, const SDValue &Callee, const PPCSubtarget &Subtarget, const TargetMachine &TM, bool IsStrictFPCall=false)
static bool isGPRShadowAligned(MCPhysReg Reg, Align RequiredAlign)
static bool isConstantOrUndef(int Op, int Val)
isConstantOrUndef - Op is either an undef node or a ConstantSDNode.
static bool needStackSlotPassParameters(const PPCSubtarget &Subtarget, const SmallVectorImpl< ISD::OutputArg > &Outs)
static bool isAlternatingShuffMask(const ArrayRef< int > &Mask, int NumElts)
static Instruction * callIntrinsic(IRBuilderBase &Builder, Intrinsic::ID Id)
static SDValue addShuffleForVecExtend(SDNode *N, SelectionDAG &DAG, SDValue Input, uint64_t Elems, uint64_t CorrectElems)
static SDValue combineADDToADDZE(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool findConsecutiveLoad(LoadSDNode *LD, SelectionDAG &DAG)
static SDValue generateEquivalentSub(SDNode *N, int Size, bool Complement, bool Swap, SDLoc &DL, SelectionDAG &DAG)
This function is called when we have proved that a SETCC node can be replaced by subtraction (and oth...
static unsigned mapArgRegToOffsetAIX(unsigned Reg, const PPCFrameLowering *FL)
static bool callsShareTOCBase(const Function *Caller, SDValue Callee, const TargetMachine &TM)
static SDValue combineADDToMAT_PCREL_ADDR(SDNode *N, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static void setAlignFlagsForFI(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Set alignment flags based on whether or not the Frame Index is aligned.
static bool isTOCSaveRestoreRequired(const PPCSubtarget &Subtarget)
static cl::opt< bool > DisablePPCPreinc("disable-ppc-preinc", cl::desc("disable preincrement load/store generation on PPC"), cl::Hidden)
static bool provablyDisjointOr(SelectionDAG &DAG, const SDValue &N)
Used when computing address flags for selecting loads and stores.
static bool isFunctionGlobalAddress(SDValue Callee)
static void CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64, SDValue Arg, int SPDiff, unsigned ArgOffset, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
CalculateTailCallArgDest - Remember Argument for later processing.
static bool isPCRelNode(SDValue N)
static void LowerMemOpCallTo(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue Arg, SDValue PtrOff, int SPDiff, unsigned ArgOffset, bool isPPC64, bool isTailCall, bool isVector, SmallVectorImpl< SDValue > &MemOpChains, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments, const SDLoc &dl)
LowerMemOpCallTo - Store the argument to the stack or remember it in case of tail calls.
static bool areCallingConvEligibleForTCO_64SVR4(CallingConv::ID CallerCC, CallingConv::ID CalleeCC)
static const MCPhysReg FPR[]
FPR - The set of FP registers that should be allocated for arguments on Darwin and AIX.
static Align CalculateStackSlotAlignment(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotAlignment - Calculates the alignment of this argument on the stack.
static cl::opt< bool > UseAbsoluteJumpTables("ppc-use-absolute-jumptables", cl::desc("use absolute jump tables on ppc"), cl::Hidden)
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V, bool HasDirectMove, bool HasP8Vector)
Do we have an efficient pattern in a .td file for this node?
static void getBaseWithConstantOffset(SDValue Loc, SDValue &Base, int64_t &Offset, SelectionDAG &DAG)
static SDValue getSToVPermuted(SDValue OrigSToV, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &S)
static void setUsesTOCBasePtr(MachineFunction &MF)
static SDValue transformCallee(const SDValue &Callee, SelectionDAG &DAG, const SDLoc &dl, const PPCSubtarget &Subtarget)
static unsigned EnsureStackAlignment(const PPCFrameLowering *Lowering, unsigned NumBytes)
EnsureStackAlignment - Round stack frame size up from NumBytes to ensure minimum alignment required f...
static SDValue stripModuloOnShift(const TargetLowering &TLI, SDNode *N, SelectionDAG &DAG)
static bool hasSameArgumentList(const Function *CallerFn, const CallBase &CB)
static bool isFPExtLoad(SDValue Op)
static SDValue BuildIntrinsicOp(unsigned IID, SDValue Op, SelectionDAG &DAG, const SDLoc &dl, EVT DestVT=MVT::Other)
BuildIntrinsicOp - Return a unary operator intrinsic node with the specified intrinsic ID.
static bool isConsecutiveLSLoc(SDValue Loc, EVT VT, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static void StoreTailCallArgumentsToStackSlot(SelectionDAG &DAG, SDValue Chain, const SmallVectorImpl< TailCallArgumentInfo > &TailCallArgs, SmallVectorImpl< SDValue > &MemOpChains, const SDLoc &dl)
StoreTailCallArgumentsToStackSlot - Stores arguments to their stack slot.
static const char AIXSSPCanaryWordName[]
static void setXFormForUnalignedFI(SDValue N, unsigned Flags, PPC::AddrMode &Mode)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
static bool isConsecutiveLS(SDNode *N, LSBaseSDNode *Base, unsigned Bytes, int Dist, SelectionDAG &DAG)
static const TargetRegisterClass * getRegClassForSVT(MVT::SimpleValueType SVT, bool IsPPC64, bool HasP8Vector, bool HasVSX)
static bool isVMerge(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned LHSStart, unsigned RHSStart)
isVMerge - Common function, used to match vmrg* shuffles.
static void getLabelAccessInfo(bool IsPIC, const PPCSubtarget &Subtarget, unsigned &HiOpFlags, unsigned &LoOpFlags, const GlobalValue *GV=nullptr)
Return true if we should reference labels using a PICBase, set the HiOpFlags and LoOpFlags to the tar...
static void buildCallOperands(SmallVectorImpl< SDValue > &Ops, PPCTargetLowering::CallFlags CFlags, const SDLoc &dl, SelectionDAG &DAG, SmallVector< std::pair< unsigned, SDValue >, 8 > &RegsToPass, SDValue Glue, SDValue Chain, SDValue &Callee, int SPDiff, const PPCSubtarget &Subtarget)
static bool usePartialVectorLoads(SDNode *N, const PPCSubtarget &ST)
Returns true if we should use a direct load into vector instruction (such as lxsd or lfd),...
static void PrepareTailCall(SelectionDAG &DAG, SDValue &InFlag, SDValue &Chain, const SDLoc &dl, int SPDiff, unsigned NumBytes, SDValue LROp, SDValue FPOp, SmallVectorImpl< TailCallArgumentInfo > &TailCallArguments)
static void fixupFuncForFI(SelectionDAG &DAG, int FrameIdx, EVT VT)
static Intrinsic::ID getIntrinsicForAtomicRMWBinOp128(AtomicRMWInst::BinOp BinOp)
static SDValue convertFPToInt(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget)
static unsigned CalculateStackSlotSize(EVT ArgVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize)
CalculateStackSlotSize - Calculates the size reserved for this argument on the stack.
static cl::opt< bool > DisablePPCUnaligned("disable-ppc-unaligned", cl::desc("disable unaligned load/store generation on PPC"), cl::Hidden)
static int CalculateTailCallSPDiff(SelectionDAG &DAG, bool isTailCall, unsigned ParamSize)
CalculateTailCallSPDiff - Get the amount the stack pointer has to be adjusted to accommodate the argu...
static void fixupShuffleMaskForPermutedSToV(SmallVectorImpl< int > &ShuffV, int LHSMaxIdx, int RHSMinIdx, int RHSMaxIdx, int HalfVec, unsigned ValidLaneWidth, const PPCSubtarget &Subtarget)
static void prepareIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, const SDLoc &dl)
static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const SDLoc &dl)
GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit the specified operations t...
static cl::opt< bool > EnableQuadwordAtomics("ppc-quadword-atomics", cl::desc("enable quadword lock-free atomic operations"), cl::init(false), cl::Hidden)
static SDValue LowerLabelRef(SDValue HiPart, SDValue LoPart, bool isPIC, SelectionDAG &DAG)
static SDValue isScalarToVec(SDValue Op)
static SDValue widenVec(SelectionDAG &DAG, SDValue Vec, const SDLoc &dl)
static bool getVectorCompareInfo(SDValue Intrin, int &CompareOpc, bool &isDot, const PPCSubtarget &Subtarget)
getVectorCompareInfo - Given an intrinsic, return false if it is not a vector comparison.
static unsigned invertFMAOpcode(unsigned Opc)
static SDNode * isBLACompatibleAddress(SDValue Op, SelectionDAG &DAG)
isCallCompatibleAddress - Return the immediate to use if the specified 32-bit value is representable ...
static bool isValidSplatLoad(const PPCSubtarget &Subtarget, const SDValue &Op, unsigned &Opcode)
static bool isSignExtended(MachineInstr &MI, const PPCInstrInfo *TII)
static SDValue convertIntToFP(SDValue Op, SDValue Src, SelectionDAG &DAG, const PPCSubtarget &Subtarget, SDValue Chain=SDValue())
static int getEstimateRefinementSteps(EVT VT, const PPCSubtarget &Subtarget)
static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, SDValue Chain, SDValue OldRetAddr, SDValue OldFP, int SPDiff, const SDLoc &dl)
EmitTailCallStoreFPAndRetAddr - Move the frame pointer and return address to the appropriate stack sl...
static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT, SelectionDAG &DAG, const SDLoc &dl)
BuildVSLDOI - Return a VECTOR_SHUFFLE that is a vsldoi of the specified amount.
static SDValue combineBVZEXTLOAD(SDNode *N, SelectionDAG &DAG)
static SDValue truncateScalarIntegerArg(ISD::ArgFlagsTy Flags, EVT ValVT, SelectionDAG &DAG, SDValue ArgValue, MVT LocVT, const SDLoc &dl)
static const SDValue * getNormalLoadInput(const SDValue &Op, bool &IsPermuted)
static void computeFlagsForAddressComputation(SDValue N, unsigned &FlagSet, SelectionDAG &DAG)
Given a node, compute flags that are used for address computation when selecting load and store instr...
static cl::opt< bool > DisableInnermostLoopAlign32("disable-ppc-innermost-loop-align32", cl::desc("don't always align innermost loop to 32 bytes on ppc"), cl::Hidden)
cl::opt< bool > ANDIGlueBug
static cl::opt< bool > DisableILPPref("disable-ppc-ilp-pref", cl::desc("disable setting the node scheduling preference to ILP on PPC"), cl::Hidden)
static SDValue getOutputChainFromCallSeq(SDValue CallSeqStart)
static bool CalculateStackSlotUsed(EVT ArgVT, EVT OrigVT, ISD::ArgFlagsTy Flags, unsigned PtrByteSize, unsigned LinkageSize, unsigned ParamAreaSize, unsigned &ArgOffset, unsigned &AvailableFPRs, unsigned &AvailableVRs)
CalculateStackSlotUsed - Return whether this argument will use its stack slot (instead of being passe...
static unsigned getPPCStrictOpcode(unsigned Opc)
static void prepareDescriptorIndirectCall(SelectionDAG &DAG, SDValue &Callee, SDValue &Glue, SDValue &Chain, SDValue CallSeqStart, const CallBase *CB, const SDLoc &dl, bool hasNest, const PPCSubtarget &Subtarget)
static bool isXXBRShuffleMaskHelper(ShuffleVectorSDNode *N, int Width)
static bool isSplatBV(SDValue Op)
static SDValue combineBVOfVecSExt(SDNode *N, SelectionDAG &DAG)
static bool isFloatingPointZero(SDValue Op)
isFloatingPointZero - Return true if this is 0.0 or -0.0.
static bool isNByteElemShuffleMask(ShuffleVectorSDNode *, unsigned, int)
Check that the mask is shuffling N byte elements.
static SDValue combineBVOfConsecutiveLoads(SDNode *N, SelectionDAG &DAG)
Reduce the number of loads when building a vector.
static bool isValidPCRelNode(SDValue N)
const char LLVMTargetMachineRef LLVMPassBuilderOptionsRef Options
Module * Mod
const char LLVMTargetMachineRef TM
const DataFlowGraph & G
Definition: RDFGraph.cpp:202
ManagedStatic< detail::RecordContext > Context
Definition: Record.cpp:94
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SI optimize exec mask operations pre RA
SI Whole Quad Mode
static bool isSplat(ArrayRef< Value * > VL)
This file contains some templates that are useful if you are working with the STL at all.
Shadow Stack GC Lowering
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
static bool Enabled
Definition: Statistic.cpp:46
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
This file describes how to lower LLVM code to machine code.
This defines the Use class.
static bool is64Bit(const char *name)
Value * RHS
Value * LHS
BinaryOperator * Mul
bool isFixed(unsigned ValNo) const
Definition: PPCCCState.h:68
opStatus convert(const fltSemantics &ToSemantics, roundingMode RM, bool *losesInfo)
Definition: APFloat.cpp:4836
bool isDenormal() const
Definition: APFloat.h:1215
APInt bitcastToAPInt() const
Definition: APFloat.h:1129
bool isPosZero() const
Definition: APFloat.h:1225
Class for arbitrary precision integers.
Definition: APInt.h:75
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition: APInt.h:214
void clearBit(unsigned BitPosition)
Set a given bit to 0.
Definition: APInt.h:1358
bool isNegatedPowerOf2() const
Check if this APInt's negated value is a power of two greater than zero.
Definition: APInt.h:434
APInt zext(unsigned width) const
Zero extend to a new width.
Definition: APInt.cpp:950
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1467
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1281
APInt abs() const
Get the absolute value.
Definition: APInt.h:1682
bool isNegative() const
Determine sign of this APInt.
Definition: APInt.h:312
bool isSignedIntN(unsigned N) const
Check if this APInt has an N-bits signed integer value.
Definition: APInt.h:420
bool getBoolValue() const
Convert APInt to a boolean value.
Definition: APInt.h:452
double bitsToDouble() const
Converts APInt bits to a double.
Definition: APInt.h:1616
bool isPowerOf2() const
Check if this APInt's value is a power of two greater than zero.
Definition: APInt.h:425
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition: APInt.h:289
static APInt getHighBitsSet(unsigned numBits, unsigned hiBitsSet)
Constructs an APInt value that has the top hiBitsSet bits set.
Definition: APInt.h:279
This class represents an incoming formal argument to a Function.
Definition: Argument.h:29
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
Class to represent array types.
Definition: DerivedTypes.h:357
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:523
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:728
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:740
@ Add
*p = old + v
Definition: Instructions.h:744
@ Or
*p = old | v
Definition: Instructions.h:752
@ Sub
*p = old - v
Definition: Instructions.h:746
@ And
*p = old & v
Definition: Instructions.h:748
@ Xor
*p = old ^ v
Definition: Instructions.h:754
@ Nand
*p = ~(old & v)
Definition: Instructions.h:750
BinOp getOperation() const
Definition: Instructions.h:806
This is an SDNode representing atomic operations.
StringRef getValueAsString() const
Return the attribute's value as a string.
Definition: Attributes.cpp:301
LLVM Basic Block Representation.
Definition: BasicBlock.h:59
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
const BlockAddress * getBlockAddress() const
The address of a basic block.
Definition: Constants.h:848
static BranchProbability getOne()
static BranchProbability getZero()
A "pseudo-class" with methods for operating on BUILD_VECTORs.
bool isConstantSplat(APInt &SplatValue, APInt &SplatUndef, unsigned &SplatBitSize, bool &HasAnyUndefs, unsigned MinSplatBits=0, bool isBigEndian=false) const
Check if this is a constant splat, and if so, find the smallest element size that splats the vector.
CCState - This class holds information needed while lowering arguments and return values.
unsigned getFirstUnallocated(ArrayRef< MCPhysReg > Regs) const
getFirstUnallocated - Return the index of the first unallocated register in the set,...
MachineFunction & getMachineFunction() const
unsigned AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
MCRegister AllocateReg(MCPhysReg Reg)
AllocateReg - Attempt to allocate one register.
bool isVarArg() const
void addLoc(const CCValAssign &V)
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
static CCValAssign getReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
bool needsCustom() const
static CCValAssign getMem(unsigned ValNo, MVT ValVT, unsigned Offset, MVT LocVT, LocInfo HTP)
bool isMemLoc() const
static CCValAssign getCustomReg(unsigned ValNo, MVT ValVT, unsigned RegNo, MVT LocVT, LocInfo HTP)
unsigned getValNo() const
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
Definition: InstrTypes.h:1176
bool isStrictFP() const
Determine if the call requires strict floating point semantics.
Definition: InstrTypes.h:1819
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1457
User::op_iterator arg_begin()
Return the iterator pointing to the beginning of the argument list.
Definition: InstrTypes.h:1318
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
Value * getCalledOperand() const
Definition: InstrTypes.h:1391
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
Definition: InstrTypes.h:1398
User::op_iterator arg_end()
Return the iterator pointing to the end of the argument list.
Definition: InstrTypes.h:1324
unsigned arg_size() const
Definition: InstrTypes.h:1341
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
ConstantFP - Floating Point Values [float, double].
Definition: Constants.h:257
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:928
uint64_t getZExtValue() const
int64_t getSExtValue() const
const APInt & getAPIntValue() const
This is an important base class in LLVM.
Definition: Constant.h:41
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:244
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:864
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space.
Definition: DataLayout.cpp:842
Align getABITypeAlign(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Definition: DataLayout.cpp:829
A debug info location.
Definition: DebugLoc.h:33
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:151
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:208
This is a fast-path instruction selection class that generates poor code and doesn't support illegal ...
Definition: FastISel.h:65
FunctionLoweringInfo - This contains information that is global to a function that is used when lower...
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:661
Attribute getFnAttribute(Attribute::AttrKind Kind) const
Return the attribute for the given attribute kind.
Definition: Function.cpp:650
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:658
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:240
arg_iterator arg_begin()
Definition: Function.h:738
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function.
Definition: Function.cpp:319
size_t arg_size() const
Definition: Function.h:771
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:624
const GlobalValue * getGlobal() const
const GlobalObject * getAliaseeObject() const
Definition: Globals.cpp:522
StringRef getSection() const
Definition: Globals.cpp:167
bool isStrongDefinitionForLinker() const
Returns true if this global's definition will be the one chosen by the linker.
Definition: GlobalValue.h:553
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:578
bool hasComdat() const
Definition: GlobalValue.h:222
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
static unsigned getNumOperandRegisters(unsigned Flag)
getNumOperandRegisters - Extract the number of registers field from the inline asm operand flag.
Definition: InlineAsm.h:344
@ Kind_RegDefEarlyClobber
Definition: InlineAsm.h:240
static unsigned getKind(unsigned Flags)
Definition: InlineAsm.h:333
const BasicBlock * getParent() const
Definition: Instruction.h:92
bool hasAtomicLoad() const
Return true if this atomic instruction loads from memory.
static LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
Base class for LoadSDNode and StoreSDNode.
An instruction for reading from memory.
Definition: Instructions.h:177
bool isUnordered() const
Definition: Instructions.h:263
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const std::vector< LoopT * > & getSubLoops() const
Return the loops contained entirely within this loop.
Definition: LoopInfo.h:143
unsigned getLoopDepth() const
Return the nesting level of this loop.
Definition: LoopInfo.h:96
block_iterator block_end() const
Definition: LoopInfo.h:177
block_iterator block_begin() const
Definition: LoopInfo.h:176
Context object for machine code objects.
Definition: MCContext.h:72
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
MCSymbolXCOFF * getQualNameSymbol() const
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
StringRef getName() const
getName - Get the symbol name.
Definition: MCSymbol.h:198
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
@ INVALID_SIMPLE_VALUE_TYPE
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool isInteger() const
Return true if this is an integer or a vector integer type.
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto fixedlen_vector_valuetypes()
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
bool isScalarInteger() const
Return true if this is an integer, not including vectors.
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setFrameAddressIsTaken(bool T)
void setHasTailCall(bool V=true)
void setReturnAddressIsTaken(bool s)
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool hasVAStart() const
Returns true if the function calls the llvm.va_start intrinsic.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineModuleInfo & getMMI() const
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
Representation of each machine instruction.
Definition: MachineInstr.h:66
@ EK_LabelDifference32
EK_LabelDifference32 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
uint64_t getSize() const
Return the size in bytes of the memory reference.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MOLoad
The memory access reads data.
@ MOInvariant
The memory access always returns the same value (or traps).
@ MOStore
The memory access writes data.
Flags getFlags() const
Return the raw flags of the source value,.
Align getAlign() const
Return the minimum known alignment in bytes of the actual memory reference.
const MCContext & getContext() const
MachineOperand class - Representation of each machine instruction operand.
static MachineOperand CreateImm(int64_t Val)
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
Register getLiveInVirtReg(MCRegister PReg) const
getLiveInVirtReg - If PReg is a live-in physical register, return the corresponding live-in virtual r...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
This SDNode is used for target intrinsics that touch memory and need an associated MachineMemOperand.
This is an abstract virtual class for memory operations.
const SDValue & getChain() const
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Align getAlign() const
const MachinePointerInfo & getPointerInfo() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
const SDValue & getBasePtr() const
unsigned getAlignment() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
GlobalValue * getNamedValue(StringRef Name) const
Return the global value in the module with the specified name, of arbitrary type.
Definition: Module.cpp:111
uint64_t getReturnSaveOffset() const
getReturnSaveOffset - Return the previous frame offset to save the return address.
uint64_t getFramePointerSaveOffset() const
getFramePointerSaveOffset - Return the previous frame offset to save the frame pointer.
unsigned getLinkageSize() const
getLinkageSize - Return the size of the PowerPC ABI linkage area.
uint64_t getTOCSaveOffset() const
getTOCSaveOffset - Return the previous frame offset to save the TOC register – 64-bit SVR4 ABI only.
PPCFunctionInfo - This class is derived from MachineFunction private PowerPC target-specific informat...
void setVarArgsNumFPR(unsigned Num)
unsigned getVarArgsNumFPR() const
void setVarArgsNumGPR(unsigned Num)
void appendParameterType(ParamType Type)
void setMinReservedArea(unsigned size)
unsigned getVarArgsNumGPR() const
unsigned getMinReservedArea() const
void setVarArgsStackOffset(int Offset)
void setVarArgsFrameIndex(int Index)
void addLiveInAttr(Register VReg, ISD::ArgFlagsTy Flags)
This function associates attributes for each live-in virtual register.
void setFramePointerSaveIndex(int Idx)
bool useLongCalls() const
Definition: PPCSubtarget.h:339
bool hasFRSQRTE() const
Definition: PPCSubtarget.h:268
bool is32BitELFABI() const
Definition: PPCSubtarget.h:370
const PPCTargetMachine & getTargetMachine() const
Definition: PPCSubtarget.h:226
const PPCFrameLowering * getFrameLowering() const override
Definition: PPCSubtarget.h:213
bool hasMMA() const
Definition: PPCSubtarget.h:289
unsigned descriptorTOCAnchorOffset() const
Definition: PPCSubtarget.h:404
bool hasFPCVT() const
Definition: PPCSubtarget.h:274
bool isAIXABI() const
Definition: PPCSubtarget.h:365
bool useSoftFloat() const
Definition: PPCSubtarget.h:245
bool use64BitRegs() const
use64BitRegs - Return true if in 64-bit mode or if we should use 64-bit registers in 32-bit mode when...
Definition: PPCSubtarget.h:254
bool hasAltivec() const
Definition: PPCSubtarget.h:275
bool allowsUnalignedFPAccess() const
Definition: PPCSubtarget.h:307
bool needsSwapsForVSXMemOps() const
Definition: PPCSubtarget.h:353
bool isPPC64() const
isPPC64 - Return true if we are generating code for 64-bit pointer mode.
bool needsTwoConstNR() const
Definition: PPCSubtarget.h:280
bool isUsingPCRelativeCalls() const
bool usesFunctionDescriptors() const
True if the ABI is descriptor based.
Definition: PPCSubtarget.h:398
bool hasQuadwordAtomics() const
Definition: PPCSubtarget.h:316
bool hasFSQRT() const
Definition: PPCSubtarget.h:265
bool hasP9Vector() const
Definition: PPCSubtarget.h:284
bool hasFRE() const
Definition: PPCSubtarget.h:266
bool hasFRSQRTES() const
Definition: PPCSubtarget.h:269
const PPCInstrInfo * getInstrInfo() const override
Definition: PPCSubtarget.h:216
MCRegister getEnvironmentPointerRegister() const
Definition: PPCSubtarget.h:416
bool hasFPU() const
Definition: PPCSubtarget.h:278
bool useCRBits() const
useCRBits - Return true if we should store and manipulate i1 values in the individual condition regis...
Definition: PPCSubtarget.h:258
bool hasRecipPrec() const
Definition: PPCSubtarget.h:270
bool hasSTFIWX() const
Definition: PPCSubtarget.h:271
bool isSVR4ABI() const
Definition: PPCSubtarget.h:366
bool hasInvariantFunctionDescriptors() const
Definition: PPCSubtarget.h:310
unsigned getCPUDirective() const
getCPUDirective - Returns the -m directive specified for the cpu.
Definition: PPCSubtarget.h:205
POPCNTDKind hasPOPCNTD() const
Definition: PPCSubtarget.h:357
bool hasEFPU2() const
Definition: PPCSubtarget.h:277
bool hasPrefixInstrs() const
Definition: PPCSubtarget.h:287
bool hasPartwordAtomics() const
Definition: PPCSubtarget.h:315
bool hasSPE() const
Definition: PPCSubtarget.h:276
bool hasLFIWAX() const
Definition: PPCSubtarget.h:272
bool isLittleEndian() const
Definition: PPCSubtarget.h:261
bool hasFCPSGN() const
Definition: PPCSubtarget.h:264
bool isTargetLinux() const
Definition: PPCSubtarget.h:363
bool hasP9Altivec() const
Definition: PPCSubtarget.h:285
MCRegister getTOCPointerRegister() const
Definition: PPCSubtarget.h:422
MCRegister getStackPointerRegister() const
Definition: PPCSubtarget.h:428
const PPCRegisterInfo * getRegisterInfo() const override
Definition: PPCSubtarget.h:223
bool has64BitSupport() const
has64BitSupport - Return true if the selected CPU supports 64-bit instructions, regardless of whether...
Definition: PPCSubtarget.h:243
bool is64BitELFABI() const
Definition: PPCSubtarget.h:369
bool hasFPRND() const
Definition: PPCSubtarget.h:273
bool isELFv2ABI() const
bool hasP8Vector() const
Definition: PPCSubtarget.h:281
bool pairedVectorMemops() const
Definition: PPCSubtarget.h:292
bool isPredictableSelectIsExpensive() const
Definition: PPCSubtarget.h:434
bool enableMachineScheduler() const override
Scheduling customization.
bool hasFRES() const
Definition: PPCSubtarget.h:267
bool isISA3_1() const
Definition: PPCSubtarget.h:338
bool hasLDBRX() const
Definition: PPCSubtarget.h:298
bool isGVIndirectSymbol(const GlobalValue *GV) const
True if the GV will be accessed via an indirect symbol.
unsigned descriptorEnvironmentPointerOffset() const
Definition: PPCSubtarget.h:410
bool isISA3_0() const
Definition: PPCSubtarget.h:337
bool hasVSX() const
Definition: PPCSubtarget.h:279
bool hasDirectMove() const
Definition: PPCSubtarget.h:317
bool hasP8Altivec() const
Definition: PPCSubtarget.h:282
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
unsigned getStackProbeSize(MachineFunction &MF) const
CCAssignFn * ccAssignFnForCall(CallingConv::ID CC, bool Return, bool IsVarArg) const
bool isTruncateFree(Type *Ty1, Type *Ty2) const override
isTruncateFree - Return true if it's free to truncate a value of type Ty1 to type Ty2.
Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const override
Perform a masked atomicrmw using a target-specific intrinsic.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool isFPExtFree(EVT DestVT, EVT SrcVT) const override
Return true if an fpext operation is free (for instance, because single-precision floating-point numb...
PPC::AddrMode SelectForceXFormMode(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
SelectForceXFormMode - Given the specified address, force it to be represented as an indexed [r+r] op...
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
const char * getTargetNodeName(unsigned Opcode) const override
getTargetNodeName() - This method returns the name of a target specific DAG node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
MachineBasicBlock * emitProbedAlloca(MachineInstr &MI, MachineBasicBlock *MBB) const
bool isZExtFree(SDValue Val, EVT VT2) const override
Return true if zero-extending the specific node Val to type VT2 is free (either because it's implicit...
MachineBasicBlock * EmitPartwordAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, bool is8bit, unsigned Opcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const override
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
bool SelectAddressRegImm(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign EncodingAlignment) const
SelectAddressRegImm - Returns true if the address N can be represented by a base register plus a sign...
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, MachineFunction &MF, unsigned Intrinsic) const override
Given an intrinsic, checks if on the target the intrinsic will need to map to a MemIntrinsicNode (tou...
bool splitValueIntoRegisterParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, Optional< CallingConv::ID > CC) const override
Target-specific splitting of values into parts that fit a register storing a legal type.
SDValue expandVSXLoadForLE(SDNode *N, DAGCombinerInfo &DCI) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
ReplaceNodeResults - Replace the results of node with an illegal result type with new values built ou...
TargetLowering::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
MachineBasicBlock * EmitAtomicBinary(MachineInstr &MI, MachineBasicBlock *MBB, unsigned AtomicSize, unsigned BinOpcode, unsigned CmpOpcode=0, unsigned CmpPred=0) const
SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, SelectionDAG &DAG, SmallVectorImpl< SDNode * > &Created) const override
Targets may override this function to provide custom SDIV lowering for power-of-2 denominators.
void computeKnownBitsForTargetNode(const SDValue Op, KnownBits &Known, const APInt &DemandedElts, const SelectionDAG &DAG, unsigned Depth=0) const override
Determine which of the bits specified in Mask are known to be either zero or one and return them in t...
bool SelectAddressRegRegOnly(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressRegRegOnly - Given the specified addressed, force it to be represented as an indexed [r+...
bool useSoftFloat() const override
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const override
Perform a masked cmpxchg using a target-specific intrinsic.
ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const override
Examine constraint string and operand type and determine a weight value.
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
getByValTypeAlignment - Return the desired alignment for ByVal aggregate function arguments in the ca...
bool enableAggressiveFMAFusion(EVT VT) const override
Return true if target always benefits from combining into FMA for a given value type.
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
bool decomposeMulByConstant(LLVMContext &Context, EVT VT, SDValue C) const override
Return true if it is profitable to transform an integer multiplication-by-constant into simpler opera...
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, Type *Ty, unsigned AS, Instruction *I=nullptr) const override
isLegalAddressingMode - Return true if the addressing mode represented by AM is legal for this target...
bool preferIncOfAddToSubOfNot(EVT VT) const override
These two forms are equivalent: sub y, (xor x, -1) add (add x, 1), y The variant with two add's is IR...
bool shouldConvertConstantLoadToIntImm(const APInt &Imm, Type *Ty) const override
Returns true if it is beneficial to convert a load of a constant to just the constant itself.
const MCPhysReg * getScratchRegisters(CallingConv::ID CC) const override
Returns a 0 terminated array of registers that can be safely used as scratch registers.
bool getPreIndexedAddressParts(SDNode *N, SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM, SelectionDAG &DAG) const override
getPreIndexedAddressParts - returns true by value, base pointer and offset pointer and addressing mod...
void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const override
LowerAsmOperandForConstraint - Lower the specified operand into the Ops vector.
bool isProfitableToHoist(Instruction *I) const override
isProfitableToHoist - Check if it is profitable to hoist instruction I to its dominator block.
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
Returns true if the target can instruction select the specified FP immediate natively.
ConstraintType getConstraintType(StringRef Constraint) const override
getConstraintType - Given a constraint, return the type of constraint it is for this target.
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue expandVSXStoreForLE(SDNode *N, DAGCombinerInfo &DCI) const
bool useLoadStackGuardNode() const override
Override to support customized stack guard loading.
bool hasInlineStackProbe(MachineFunction &MF) const override
PPCTargetLowering(const PPCTargetMachine &TM, const PPCSubtarget &STI)
bool SelectAddressRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG, MaybeAlign EncodingAlignment=None) const
SelectAddressRegReg - Given the specified addressed, check to see if it can be more efficiently repre...
TargetLowering::AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AddrSpace, Align Alignment=Align(1), MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *Fast=nullptr) const override
Is unaligned memory access allowed for the given type, and is it fast relative to software emulation.
bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, EVT VT) const override
isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster than a pair of fmul and fadd i...
bool shouldExpandBuildVectorWithShuffles(EVT VT, unsigned DefinedValues) const override
bool SelectAddressRegImm34(SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG) const
Similar to the 16-bit case but for instructions that take a 34-bit displacement field (prefixed loads...
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
Register getExceptionSelectorRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception typeid on entry to a la...
bool isJumpTableRelative() const override
Register getExceptionPointerRegister(const Constant *PersonalityFn) const override
If a physical register, this returns the register that receives the exception address on entry to an ...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
LowerOperation - Provide custom lowering hooks for some operations.
PPC::AddrMode SelectOptimalAddrMode(const SDNode *Parent, SDValue N, SDValue &Disp, SDValue &Base, SelectionDAG &DAG, MaybeAlign Align) const
SelectOptimalAddrMode - Based on a node N and it's Parent (a MemSDNode), compute the address flags of...
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
bool SelectAddressPCRel(SDValue N, SDValue &Base) const
SelectAddressPCRel - Represent the specified address as pc relative to be represented as [pc+imm].
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
bool SelectAddressEVXRegReg(SDValue N, SDValue &Base, SDValue &Index, SelectionDAG &DAG) const
SelectAddressEVXRegReg - Given the specified addressed, check to see if it can be more efficiently re...
bool isLegalICmpImmediate(int64_t Imm) const override
isLegalICmpImmediate - Return true if the specified immediate is legal icmp immediate,...
bool isAccessedAsGotIndirect(SDValue N) const
Align getPrefLoopAlignment(MachineLoop *ML) const override
Return the preferred loop alignment.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo) const override
createFastISel - This method returns a target-specific FastISel object, or null if the target does no...
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Inserts in the IR a target-specific intrinsic specifying a fence.
bool isLegalAddImmediate(int64_t Imm) const override
isLegalAddImmediate - Return true if the specified immediate is legal add immediate,...
Common code between 32-bit and 64-bit PowerPC targets.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
This class provides iterator support for SDUse operands that use a specific SDNode.
Represents one node in the SelectionDAG.
void dump() const
Dump this node, for debugging.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
bool hasOneUse() const
Return true if there is exactly one use of this node.
bool isOnlyUserOf(const SDNode *N) const
Return true if this node is the only use of N.
SDNodeFlags getFlags() const
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
unsigned getNumOperands() const
Return the number of values used by this operation.
use_iterator use_begin() const
Provide iteration support to walk over all uses of an SDNode.
const SDValue & getOperand(unsigned Num) const
ArrayRef< SDUse > ops() const
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
iterator_range< use_iterator > uses()
bool hasNUsesOfValue(unsigned NUses, unsigned Value) const
Return true if there are exactly NUSES uses of the indicated value.
op_iterator op_end() const
op_iterator op_begin() const
static use_iterator use_end()
Represents a use of a SDNode.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
bool isUndef() const
bool hasOneUse() const
Return true if there is exactly one node using value ResNo of Node.
SDValue getValue(unsigned R) const
void dump() const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
unsigned getNumOperands() const
static SectionKind getMetadata()
Definition: SectionKind.h:182
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:683
SDValue getSelect(const SDLoc &DL, EVT VT, SDValue Cond, SDValue LHS, SDValue RHS)
Helper function to make it easier to build Select's if you just have operands and don't want to check...
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getSetCC(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode Cond, SDValue Chain=SDValue(), bool IsSignaling=false)
Helper function to make it easier to build SetCC's if you just have an ISD::CondCode instead of an SD...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, bool isTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=None, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:699
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Align getEVTAlign(EVT MemoryVT) const
Compute the default alignment value for the given type.
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
static constexpr unsigned MaxRecursionDepth
Definition: SelectionDAG.h:416
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:693
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
Definition: SelectionDAG.h:935
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
bool isSplatValue(SDValue V, const APInt &DemandedElts, APInt &UndefElts, unsigned Depth=0) const
Test whether V has a splatted value for all the demanded elements.
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:442
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getTargetFrameIndex(int FI, EVT VT)
Definition: SelectionDAG.h:688
SDValue getTokenFactor(const SDLoc &DL, SmallVectorImpl< SDValue > &Vals)
Creates a new TokenFactor containing Vals.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
void ReplaceAllUsesWith(SDValue From, SDValue To)
Modify anything using 'From' to use 'To' instead.
SDValue getCommutedVectorShuffle(const ShuffleVectorSDNode &SV)
Returns an ISD::VECTOR_SHUFFLE node semantically equivalent to the shuffle node in input but with swa...
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
Definition: SelectionDAG.h:923
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either sign-extending or trunca...
SDValue getBoolExtOrTrunc(SDValue Op, const SDLoc &SL, EVT VT, EVT OpVT)
Convert Op, which must be of integer type, to the integer type VT, by using an extension appropriate ...
SDValue getExternalSymbol(const char *Sym, EVT VT)
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
SDValue getAnyExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either any-extending or truncat...
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:735
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
const TargetLowering & getTargetLoweringInfo() const
Definition: SelectionDAG.h:443
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
unsigned ComputeNumSignBits(SDValue Op, unsigned Depth=0) const
Return the number of times the sign bit of the register is replicated into the other bits.
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:730
bool isBaseWithConstantOffset(SDValue Op) const
Return true if the specified operand is an ISD::ADD with a ConstantSDNode on the right-hand side,...
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:761
SDValue getSplatBuildVector(EVT VT, const SDLoc &DL, SDValue Op)
Return a splat ISD::BUILD_VECTOR node, consisting of Op splatted to all elements.
Definition: SelectionDAG.h:807
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
KnownBits computeKnownBits(SDValue Op, unsigned Depth=0) const
Determine which bits of Op are known to be either zero or one and return them in Known.
SDValue getRegisterMask(const uint32_t *RegMask)
SDValue getZExtOrTrunc(SDValue Op, const SDLoc &DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
SDValue getCondCode(ISD::CondCode Cond)
bool MaskedValueIsZero(SDValue Op, const APInt &Mask, unsigned Depth=0) const
Return true if 'Op & Mask' is known to be zero.
SDValue getObjectPtrOffset(const SDLoc &SL, SDValue Ptr, TypeSize Offset)
Create an add instruction with appropriate flags when used for addressing some offset of an object.
Definition: SelectionDAG.h:906
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
SDValue getMCSymbol(MCSymbol *Sym, EVT VT)
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:441
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This SDNode is used to implement the code generator support for the llvm IR shufflevector instruction...
int getMaskElt(unsigned Idx) const
ArrayRef< int > getMask() const
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:383
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:136
const_iterator begin() const
Definition: SmallSet.h:225
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:166
void clear()
Definition: SmallSet.h:220
std::pair< NoneType, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
const_iterator end() const
Definition: SmallSet.h:231
LLVM_NODISCARD bool empty() const
Definition: SmallVector.h:73
size_t size() const
Definition: SmallVector.h:70
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:554
LLVM_NODISCARD T pop_back_val()
Definition: SmallVector.h:654
void push_back(const T &Elt)
Definition: SmallVector.h:400
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1183
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:58
std::enable_if_t< std::numeric_limits< T >::is_signed, bool > getAsInteger(unsigned Radix, T &Result) const
Parse the current string as an integer of the specified radix.
Definition: StringRef.h:510
constexpr LLVM_NODISCARD size_t size() const
size - Get the string size.
Definition: StringRef.h:157
LLVM_NODISCARD const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:149
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
LLVM_NODISCARD R Default(T Value)
Definition: StringSwitch.h:183
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
Class to represent struct types.
Definition: DerivedTypes.h:213
Information about stack frame layout on the target.
unsigned getStackAlignment() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
Provides information about what library functions are available for the current target.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
bool PredictableSelectIsExpensive
Tells the code generator that select is more expensive than a branch if the branch is usually predict...
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
virtual bool shouldExpandBuildVectorWithShuffles(EVT, unsigned DefinedValues) const
unsigned MaxStoresPerMemcpyOptSize
Likewise for functions with the OptSize attribute.
MachineBasicBlock * emitPatchPoint(MachineInstr &MI, MachineBasicBlock *MBB) const
Replace/modify any TargetFrameIndex operands with a targte-dependent sequence of memory operands that...
const TargetMachine & getTargetMachine() const
void setIndexedStoreAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed store does or does not work with the specified type and indicate ...
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual MVT getVectorIdxTy(const DataLayout &DL) const
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
unsigned MaxLoadsPerMemcmp
Specify maximum number of load instructions per memcmp call.
virtual bool isZExtFree(Type *FromTy, Type *ToTy) const
Return true if any actual instruction that defines a value of type FromTy implicitly zero-extends the...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
void setPrefLoopAlignment(Align Alignment)
Set the target's preferred loop alignment.
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
Sched::Preference getSchedulingPreference() const
Return target scheduling preference.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
bool isOperationCustom(unsigned Op, EVT VT) const
Return true if the operation uses custom lowering, regardless of whether the type is legal or not.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
unsigned MaxStoresPerMemsetOptSize
Likewise for functions with the OptSize attribute.
bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const
When splitting a value of the specified type into parts, does the Lo or Hi part come first?...
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
unsigned MaxStoresPerMemmove
Specify maximum number of store instructions per memmove call.
virtual Align getPrefLoopAlignment(MachineLoop *ML=nullptr) const
Return the preferred loop alignment.
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, bool LegalTypes=true) const
Returns the type for the shift amount of a shift opcode.
unsigned MaxStoresPerMemmoveOptSize
Likewise for functions with the OptSize attribute.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setPrefFunctionAlignment(Align Alignment)
Set the target's preferred function alignment.
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
unsigned MaxStoresPerMemset
Specify maximum number of store instructions per memset call.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isOperationLegalOrCustom(unsigned Op, EVT VT, bool LegalOnly=false) const
Return true if the specified operation is legal on this target or can be made legal with custom lower...
unsigned MaxLoadsPerMemcmpOptSize
Likewise for functions with the OptSize attribute.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT)
If Opc/OrigVT is specified as being promoted, the promotion code defaults to trying a larger integer/...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
NegatibleCost
Enum that specifies when a float negation is beneficial.
std::vector< ArgListEntry > ArgListTy
void setHasMultipleConditionRegisters(bool hasManyRegs=true)
Tells the code generator that the target has multiple (allocatable) condition registers that can be u...
unsigned MaxStoresPerMemcpy
Specify maximum number of store instructions per memcpy call.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
void setJumpIsExpensive(bool isExpensive=true)
Tells the code generator not to expand logic operations on comparison predicates into separate sequen...
void setIndexedLoadAction(unsigned IdxMode, MVT VT, LegalizeAction Action)
Indicate that the specified indexed load does or does not work with the specified type and indicate w...
virtual MCSymbol * getFunctionEntryPointSymbol(const GlobalValue *Func, const TargetMachine &TM) const
If supported, return the function entry point symbol.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const
virtual bool useLoadStackGuardNode() const
If this function returns true, SelectionDAGBuilder emits a LOAD_STACK_GUARD node when it is lowering ...
void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, const SDLoc &DL, const SDValue OldLHS, const SDValue OldRHS) const
Soften the operands of a comparison.
std::pair< SDValue, SDValue > makeLibCall(SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef< SDValue > Ops, MakeLibCallOptions CallOptions, const SDLoc &dl, SDValue Chain=SDValue()) const
Returns a pair of (return value, chain).
SDValue getCheaperNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, unsigned Depth=0) const
This is the helper function to return the newly negated expression only when the cost is cheaper.
virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, std::vector< SDValue > &Ops, SelectionDAG &DAG) const
Lower the specified operand into the Ops vector.
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, SelectionDAG &DAG) const
Lower TLS global address SDNode for target independent emulated TLS model.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual SDValue getNegatedExpression(SDValue Op, SelectionDAG &DAG, bool LegalOps, bool OptForSize, NegatibleCost &Cost, unsigned Depth=0) const
Return the newly negated expression if the cost is not expensive and set the cost in Cost to indicate...
virtual ConstraintWeight getSingleConstraintMatchWeight(AsmOperandInfo &info, const char *constraint) const
Examine constraint string and operand type and determine a weight value.
virtual SDValue getSqrtInputTest(SDValue Operand, SelectionDAG &DAG, const DenormalMode &Mode) const
Return a target-dependent comparison result if the input operand is suitable for use with a square ro...
virtual SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const
Returns relocation base for the given PIC jumptable.
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual SDValue getSqrtResultForDenormInput(SDValue Operand, SelectionDAG &DAG) const
Return a target-dependent result if the input operand is not suitable for use with a square root esti...
virtual bool isGAPlusOffset(SDNode *N, const GlobalValue *&GA, int64_t &Offset) const
Returns true (and the GlobalValue and the offset) if the node is a GlobalAddress + offset.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
bool useEmulatedTLS() const
Returns true if this target uses emulated TLS.
Reloc::Model getRelocationModel() const
Returns the code generation relocation model.
CodeModel::Model getCodeModel() const
Returns the code model.
TargetOptions Options
bool shouldAssumeDSOLocal(const Module &M, const GlobalValue *GV) const
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
unsigned NoInfsFPMath
NoInfsFPMath - This flag is enabled when the -enable-no-infs-fp-math flag is specified on the command...
unsigned NoNaNsFPMath
NoNaNsFPMath - This flag is enabled when the -enable-no-nans-fp-math flag is specified on the command...
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Target - Wrapper for Target specific information.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:83
static TypeSize Fixed(ScalarTy MinVal)
Definition: TypeSize.h:422
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:226
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition: Type.h:147
static Type * getVoidTy(LLVMContext &C)
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition: Type.h:308
@ FloatTyID
32-bit floating point type
Definition: Type.h:58
@ DoubleTyID
64-bit floating point type
Definition: Type.h:59
@ FP128TyID
128-bit floating point type (112-bit significand)
Definition: Type.h:61
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
Definition: Type.h:150
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:190
TypeID getTypeID() const
Return the type id for the type.
Definition: Type.h:135
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getPointerElementType() const
This method is deprecated without replacement.
Definition: Type.h:371
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Value * getOperand(unsigned i) const
Definition: User.h:169
unsigned getNumOperands() const
Definition: User.h:191
LLVM Value Representation.
Definition: Value.h:74
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition: Value.h:434
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
Base class of all SIMD vector types.
Definition: DerivedTypes.h:389
Iterator for intrusive lists based on ilist_node.
self_iterator getIterator()
Definition: ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1016
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1012
@ TargetConstantPool
Definition: ISDOpcodes.h:168
@ STRICT_FSETCC
STRICT_FSETCC/STRICT_FSETCCS - Constrained versions of SETCC, used for floating-point operands only.
Definition: ISDOpcodes.h:462
@ DELETED_NODE
DELETED_NODE - This is an illegal value that is used to catch errors.
Definition: ISDOpcodes.h:44
@ FLT_ROUNDS_
Returns current rounding mode: -1 Undefined 0 Round to 0 1 Round to nearest, ties to even 2 Round to ...
Definition: ISDOpcodes.h:829
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:666
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1045
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1122
@ STRICT_FCEIL
Definition: ISDOpcodes.h:412
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAXNUM_IEEE
Definition: ISDOpcodes.h:910
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:925
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:732
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:739
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:519
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition: ISDOpcodes.h:640
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ STRICT_FSETCCS
Definition: ISDOpcodes.h:463
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:862
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:852
@ BUILD_PAIR
BUILD_PAIR - This is the opposite of EXTRACT_ELEMENT in some ways.
Definition: ISDOpcodes.h:229
@ INIT_TRAMPOLINE
INIT_TRAMPOLINE - This corresponds to the init_trampoline intrinsic.
Definition: ISDOpcodes.h:1083
@ STRICT_FSQRT
Constrained versions of libm-equivalent floating point intrinsics.
Definition: ISDOpcodes.h:398
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:726
@ STRICT_UINT_TO_FP
Definition: ISDOpcodes.h:436
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:583
@ TargetExternalSymbol
Definition: ISDOpcodes.h:169
@ BR
Control flow instructions. These all have token chains.
Definition: ISDOpcodes.h:941
@ TargetJumpTable
Definition: ISDOpcodes.h:167
@ PREFETCH
PREFETCH - This corresponds to a prefetch intrinsic.
Definition: ISDOpcodes.h:1105
@ FSINCOS
FSINCOS - Compute both fsin and fcos as a single operation.
Definition: ISDOpcodes.h:919
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:870
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:967
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:950
@ SSUBSAT
RESULT = [US]SUBSAT(LHS, RHS) - Perform saturation subtraction on 2 integers with the same bit width ...
Definition: ISDOpcodes.h:336
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:679
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1118
@ EXTRACT_ELEMENT
EXTRACT_ELEMENT - This is used to get the lower or upper (determined by a Constant,...
Definition: ISDOpcodes.h:222
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1041
@ TargetGlobalAddress
TargetGlobalAddress - Like GlobalAddress, but the DAG does no folding or anything else with this node...
Definition: ISDOpcodes.h:164
@ STRICT_FTRUNC
Definition: ISDOpcodes.h:416
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:614
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
@ VECTOR_SHUFFLE
VECTOR_SHUFFLE(VEC1, VEC2) - Returns a vector, of the same type as VEC1/VEC2.
Definition: ISDOpcodes.h:563
@ EXTRACT_SUBVECTOR
EXTRACT_SUBVECTOR(VECTOR, IDX) - Returns a subvector from VECTOR.
Definition: ISDOpcodes.h:549
@ FMINNUM_IEEE
FMINNUM_IEEE/FMAXNUM_IEEE - Perform floating-point minimum or maximum on two values,...
Definition: ISDOpcodes.h:909
@ STRICT_FMAXNUM
Definition: ISDOpcodes.h:410
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:729
@ STRICT_FMINNUM
Definition: ISDOpcodes.h:411
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:694
@ ATOMIC_CMP_SWAP
Val, OUTCHAIN = ATOMIC_CMP_SWAP(INCHAIN, ptr, cmp, swap) For double-word atomic operations: ValLo,...
Definition: ISDOpcodes.h:1129
@ FMINNUM
FMINNUM/FMAXNUM - Perform floating-point minimum or maximum on two values.
Definition: ISDOpcodes.h:902
@ DYNAMIC_STACKALLOC
DYNAMIC_STACKALLOC - Allocate some number of bytes on the stack aligned to a specified boundary.
Definition: ISDOpcodes.h:935
@ ConstantPool
Definition: ISDOpcodes.h:82
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
@ SMIN
[US]{MIN/MAX} - Binary minimum or maximum of signed or unsigned integers.
Definition: ISDOpcodes.h:626
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:837
@ STRICT_FROUND
Definition: ISDOpcodes.h:414
@ VSELECT
Select with a vector condition (op #0) and two vector operands (ops #1 and #2), returning a vector re...
Definition: ISDOpcodes.h:688
@ STRICT_SINT_TO_FP
STRICT_[US]INT_TO_FP - Convert a signed or unsigned integer to a floating point value.
Definition: ISDOpcodes.h:435
@ STRICT_FFLOOR
Definition: ISDOpcodes.h:413
@ INLINEASM_BR
INLINEASM_BR - Branching version of inline asm. Used by asm-goto.
Definition: ISDOpcodes.h:987
@ EH_DWARF_CFA
EH_DWARF_CFA - This node represents the pointer to the DWARF Canonical Frame Address (CFA),...
Definition: ISDOpcodes.h:129
@ FRAMEADDR
FRAMEADDR, RETURNADDR - These nodes represent llvm.frameaddress and llvm.returnaddress on the DAG.
Definition: ISDOpcodes.h:94
@ STRICT_FP_TO_UINT
Definition: ISDOpcodes.h:429
@ STRICT_FP_ROUND
X = STRICT_FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision ...
Definition: ISDOpcodes.h:451
@ STRICT_FP_TO_SINT
STRICT_FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:428
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:785
@ READCYCLECOUNTER
READCYCLECOUNTER - This corresponds to the readcyclecounter intrinsic.
Definition: ISDOpcodes.h:1072
@ STRICT_FP_EXTEND
X = STRICT_FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:456
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
@ TRAP
TRAP - Trapping instruction.
Definition: ISDOpcodes.h:1092
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ STRICT_FADD
Constrained versions of the binary floating point operators.
Definition: ISDOpcodes.h:387
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:818
@ ZERO_EXTEND_VECTOR_INREG
ZERO_EXTEND_VECTOR_INREG(Vector) - This operator represents an in-register zero-extension of the low ...
Definition: ISDOpcodes.h:780
@ INLINEASM
INLINEASM - Represents an inline asm block.
Definition: ISDOpcodes.h:984
@ STRICT_FNEARBYINT
Definition: ISDOpcodes.h:409
@ EH_SJLJ_SETJMP
RESULT, OUTCHAIN = EH_SJLJ_SETJMP(INCHAIN, buffer) This corresponds to the eh.sjlj....
Definition: ISDOpcodes.h:141
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
@ VAARG
VAARG - VAARG has four operands: an input chain, a pointer, a SRCVALUE, and the alignment.
Definition: ISDOpcodes.h:1036
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:960
@ BlockAddress
Definition: ISDOpcodes.h:84
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:715
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:476
@ SADDSAT
RESULT = [US]ADDSAT(LHS, RHS) - Perform saturation addition on 2 integers with the same bit width (W)...
Definition: ISDOpcodes.h:327
@ AssertZext
Definition: ISDOpcodes.h:62
@ CALLSEQ_START
CALLSEQ_START/CALLSEQ_END - These operators mark the beginning and end of a call sequence,...
Definition: ISDOpcodes.h:1030
@ STRICT_FRINT
Definition: ISDOpcodes.h:408
@ GET_DYNAMIC_AREA_OFFSET
GET_DYNAMIC_AREA_OFFSET - get offset from native SP to the address of the most recent dynamic alloca.
Definition: ISDOpcodes.h:1197
@ ADJUST_TRAMPOLINE
ADJUST_TRAMPOLINE - This corresponds to the adjust_trampoline intrinsic.
Definition: ISDOpcodes.h:1089
@ INTRINSIC_W_CHAIN
RESULT,OUTCHAIN = INTRINSIC_W_CHAIN(INCHAIN, INTRINSICID, arg1, ...) This node represents a target in...
Definition: ISDOpcodes.h:192
@ TargetGlobalTLSAddress
Definition: ISDOpcodes.h:165
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
bool isNormalStore(const SDNode *N)
Returns true if the specified node is a non-truncating and unindexed store.
bool isZEXTLoad(const SDNode *N)
Returns true if the specified node is a ZEXTLOAD.
bool isUNINDEXEDLoad(const SDNode *N)
Returns true if the specified node is an unindexed load.
bool isEXTLoad(const SDNode *N)
Returns true if the specified node is a EXTLOAD.
bool isBuildVectorAllZeros(const SDNode *N)
Return true if the specified node is a BUILD_VECTOR where all of the elements are 0 or undef.
bool isSignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs a signed comparison when used with integer o...
Definition: ISDOpcodes.h:1403
MemIndexedMode
MemIndexedMode enum - This enum defines the load / store indexed addressing modes.
Definition: ISDOpcodes.h:1319
bool isSEXTLoad(const SDNode *N)
Returns true if the specified node is a SEXTLOAD.
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1370
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1350
bool isUnsignedIntSetCC(CondCode Code)
Return true if this is a setcc instruction that performs an unsigned comparison when used with intege...
Definition: ISDOpcodes.h:1409
bool isNormalLoad(const SDNode *N)
Returns true if the specified node is a non-extending and unindexed load.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1398
@ Bitcast
Perform the operation on a different, but equivalently sized type.
LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
Flag
These should be considered private to the implementation of the MCInstrDesc class.
Definition: MCInstrDesc.h:146
@ VecShuffle
Definition: NVPTX.h:88
@ MO_GOT_TPREL_PCREL_FLAG
MO_GOT_TPREL_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:150
@ MO_TLSGDM_FLAG
MO_TLSGDM_FLAG - If this bit is set the symbol reference is relative to the region handle of TLS Gene...
Definition: PPC.h:135
@ MO_PCREL_FLAG
MO_PCREL_FLAG - If this bit is set, the symbol reference is relative to the current instruction addre...
Definition: PPC.h:109
@ MO_GOT_FLAG
MO_GOT_FLAG - If this bit is set the symbol reference is to be computed via the GOT.
Definition: PPC.h:114
@ MO_TPREL_HA
Definition: PPC.h:160
@ MO_PLT
On a symbol operand "FOO", this indicates that the reference is actually to "FOO@plt".
Definition: PPC.h:101
@ MO_TLS
Definition: PPC.h:169
@ MO_TPREL_FLAG
MO_TPREL_FLAG - If this bit is set the symbol reference is relative to TLS Initial Exec model.
Definition: PPC.h:127
@ MO_TPREL_LO
Definition: PPC.h:159
@ MO_LO
MO_LO, MO_HA - lo16(symbol) and ha16(symbol)
Definition: PPC.h:156
@ MO_GOT_TLSLD_PCREL_FLAG
MO_GOT_TLSLD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:145
@ MO_TLSGD_FLAG
MO_TLSGD_FLAG - If this bit is set the symbol reference is relative to TLS General Dynamic model for ...
Definition: PPC.h:123
@ MO_GOT_TLSGD_PCREL_FLAG
MO_GOT_TLSGD_PCREL_FLAG - A combintaion of flags, if these bits are set they should produce the reloc...
Definition: PPC.h:140
@ MO_HA
Definition: PPC.h:157
@ MO_PIC_FLAG
MO_PIC_FLAG - If this bit is set, the symbol reference is relative to the function's picbase,...
Definition: PPC.h:105
@ SEXT_LD_SPLAT
VSRC, CHAIN = SEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that sign-extends.
@ FCTIDUZ
Newer FCTI[D,W]UZ floating-point-to-integer conversion instructions for unsigned integers with round ...
@ ADDI_TLSGD_L_ADDR
G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSGD_L and GET_TLS_ADDR unti...
@ FSQRT
Square root instruction.
@ STRICT_FCFID
Constrained integer-to-floating-point conversion instructions.
@ DYNALLOC
The following two target-specific nodes are used for calls through function pointers in the 64-bit SV...
@ COND_BRANCH
CHAIN = COND_BRANCH CHAIN, CRRC, OPC, DESTBB [, INFLAG] - This corresponds to the COND_BRANCH pseudo ...
@ VABSD
An SDNode for Power9 vector absolute value difference.
@ CALL_RM
The variants that implicitly define rounding mode for calls with strictfp semantics.
@ STORE_VEC_BE
CHAIN = STORE_VEC_BE CHAIN, VSRC, Ptr - Occurs only for little endian.
@ BDNZ
CHAIN = BDNZ CHAIN, DESTBB - These are used to create counter-based loops.
@ MTVSRZ
Direct move from a GPR to a VSX register (zero)
@ SRL
These nodes represent PPC shifts.
@ VECINSERT
VECINSERT - The PPC vector insert instruction.
@ LXSIZX
GPRC, CHAIN = LXSIZX, CHAIN, Ptr, ByteWidth - This is a load of an integer smaller than 64 bits into ...
@ FNMSUB
FNMSUB - Negated multiply-subtract instruction.
@ RFEBB
CHAIN = RFEBB CHAIN, State - Return from event-based branch.
@ FCTIDZ
FCTI[D,W]Z - The FCTIDZ and FCTIWZ instructions, taking an f32 or f64 operand, producing an f64 value...
@ SC
CHAIN = SC CHAIN, Imm128 - System call.
@ GET_TLS_ADDR
x3 = GET_TLS_ADDR x3, Symbol - For the general-dynamic TLS model, produces a call to __tls_get_addr(s...
@ FP_TO_UINT_IN_VSR
Floating-point-to-interger conversion instructions.
@ XXSPLTI32DX
XXSPLTI32DX - The PPC XXSPLTI32DX instruction.
@ ANDI_rec_1_EQ_BIT
i1 = ANDI_rec_1_[EQ|GT]_BIT(i32 or i64 x) - Represents the result of the eq or gt bit of CR0 after ex...
@ FRE
Reciprocal estimate instructions (unary FP ops).
@ ADDIS_GOT_TPREL_HA
G8RC = ADDIS_GOT_TPREL_HA x2, Symbol - Used by the initial-exec TLS model, produces an ADDIS8 instruc...
@ CLRBHRB
CHAIN = CLRBHRB CHAIN - Clear branch history rolling buffer.
@ SINT_VEC_TO_FP
Extract a subvector from signed integer vector and convert to FP.
@ EXTRACT_SPE
Extract SPE register component, second argument is high or low.
@ XXSWAPD
VSRC, CHAIN = XXSWAPD CHAIN, VSRC - Occurs only for little endian.
@ ADDI_TLSLD_L_ADDR
G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that combines ADDI_TLSLD_L and GET_TLSLD_ADDR un...
@ ATOMIC_CMP_SWAP_8
ATOMIC_CMP_SWAP - the exact same as the target-independent nodes except they ensure that the compare ...
@ ST_VSR_SCAL_INT
Store scalar integers from VSR.
@ VCMP
RESVEC = VCMP(LHS, RHS, OPC) - Represents one of the altivec VCMP* instructions.
@ BCTRL
CHAIN,FLAG = BCTRL(CHAIN, INFLAG) - Directly corresponds to a BCTRL instruction.
@ BUILD_SPE64
BUILD_SPE64 and EXTRACT_SPE are analogous to BUILD_PAIR and EXTRACT_ELEMENT but take f64 arguments in...
@ LFIWZX
GPRC, CHAIN = LFIWZX CHAIN, Ptr - This is a floating-point load which zero-extends from a 32-bit inte...
@ SCALAR_TO_VECTOR_PERMUTED
PowerPC instructions that have SCALAR_TO_VECTOR semantics tend to place the value into the least sign...
@ EXTRACT_VSX_REG
EXTRACT_VSX_REG = Extract one of the underlying vsx registers of an accumulator or pair register.
@ STXSIX
STXSIX - The STXSI[bh]X instruction.
@ MAT_PCREL_ADDR
MAT_PCREL_ADDR = Materialize a PC Relative address.
@ MFOCRF
R32 = MFOCRF(CRREG, INFLAG) - Represents the MFOCRF instruction.
@ XXSPLT
XXSPLT - The PPC VSX splat instructions.
@ TOC_ENTRY
GPRC = TOC_ENTRY GA, TOC Loads the entry for GA from the TOC, where the TOC base is given by the last...
@ XXPERMDI
XXPERMDI - The PPC XXPERMDI instruction.
@ ADDIS_DTPREL_HA
G8RC = ADDIS_DTPREL_HA x3, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction t...
@ ADD_TLS
G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS model, produces an ADD instruction that ...
@ MTVSRA
Direct move from a GPR to a VSX register (algebraic)
@ VADD_SPLAT
VRRC = VADD_SPLAT Elt, EltSize - Temporary node to be expanded during instruction selection to optimi...
@ PPC32_GOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ ADDI_DTPREL_L
G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction ...
@ BCTRL_LOAD_TOC
CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl instruction and the TOC reload r...
@ PPC32_PICGOT
GPRC = address of GLOBAL_OFFSET_TABLE.
@ FCFID
FCFID - The FCFID instruction, taking an f64 operand and producing and f64 value containing the FP re...
@ CR6SET
ch, gl = CR6[UN]SET ch, inglue - Toggle CR bit 6 for SVR4 vararg calls
@ LBRX
GPRC, CHAIN = LBRX CHAIN, Ptr, Type - This is a byte-swapping load instruction.
@ LD_VSX_LH
VSRC, CHAIN = LD_VSX_LH CHAIN, Ptr - This is a floating-point load of a v2f32 value into the lower ha...
@ PROBED_ALLOCA
To avoid stack clash, allocation is performed by block and each block is probed.
@ XXMFACC
XXMFACC = This corresponds to the xxmfacc instruction.
@ ADDIS_TLSGD_HA
G8RC = ADDIS_TLSGD_HA x2, Symbol - For the general-dynamic TLS model, produces an ADDIS8 instruction ...
@ ACC_BUILD
ACC_BUILD = Build an accumulator register from 4 VSX registers.
@ GlobalBaseReg
The result of the mflr at function entry, used for PIC code.
@ LXVD2X
VSRC, CHAIN = LXVD2X_LE CHAIN, Ptr - Occurs only for little endian.
@ CALL
CALL - A direct function call.
@ MTCTR
CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a MTCTR instruction.
@ TC_RETURN
TC_RETURN - A tail call return.
@ STFIWX
STFIWX - The STFIWX instruction.
@ LD_SPLAT
VSRC, CHAIN = LD_SPLAT, CHAIN, Ptr - a splatting load memory instructions such as LXVDSX,...
@ VCMP_rec
RESVEC, OUTFLAG = VCMP_rec(LHS, RHS, OPC) - Represents one of the altivec VCMP*_rec instructions.
@ MFFS
F8RC = MFFS - This moves the FPSCR (not modeled) into the register.
@ PADDI_DTPREL
G8RC = PADDI_DTPREL x3, Symbol - For the pc-rel based local-dynamic TLS model, produces a PADDI8 inst...
@ BUILD_FP128
Direct move of 2 consecutive GPR to a VSX register.
@ VEXTS
VEXTS, ByteWidth - takes an input in VSFRC and produces an output in VSFRC that is sign-extended from...
@ TLS_LOCAL_EXEC_MAT_ADDR
TLS_LOCAL_EXEC_MAT_ADDR = Materialize an address for TLS global address when using local exec access ...
@ VPERM
VPERM - The PPC VPERM Instruction.
@ ADDIS_TLSLD_HA
G8RC = ADDIS_TLSLD_HA x2, Symbol - For the local-dynamic TLS model, produces an ADDIS8 instruction th...
@ XXSPLTI_SP_TO_DP
XXSPLTI_SP_TO_DP - The PPC VSX splat instructions for immediates for converting immediate single prec...
@ GET_TLSLD_ADDR
x3 = GET_TLSLD_ADDR x3, Symbol - For the local-dynamic TLS model, produces a call to __tls_get_addr(s...
@ ADDI_TLSGD_L
x3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS model, produces an ADDI8 instruction t...
@ DYNAREAOFFSET
This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to compute an offset from native ...
@ PAIR_BUILD
PAIR_BUILD = Build a vector pair register from 2 VSX registers.
@ STRICT_FADDRTZ
Constrained floating point add in round-to-zero mode.
@ FTSQRT
Test instruction for software square root.
@ FP_EXTEND_HALF
FP_EXTEND_HALF(VECTOR, IDX) - Custom extend upper (IDX=0) half or lower (IDX=1) half of v4f32 to v2f6...
@ RET_FLAG
Return with a flag operand, matched by 'blr'.
@ CMPB
The CMPB instruction (takes two operands of i32 or i64).
@ VECSHL
VECSHL - The PPC vector shift left instruction.
@ ADDI_TLSLD_L
x3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS model, produces an ADDI8 instruction tha...
@ FADDRTZ
F8RC = FADDRTZ F8RC, F8RC - This is an FADD done with rounding towards zero.
@ ZEXT_LD_SPLAT
VSRC, CHAIN = ZEXT_LD_SPLAT, CHAIN, Ptr - a splatting load memory that zero-extends.
@ XSMAXCDP
XSMAXCDP, XSMINCDP - C-type min/max instructions.
@ SRA_ADDZE
The combination of sra[wd]i and addze used to implemented signed integer division by a power of 2.
@ EXTSWSLI
EXTSWSLI = The PPC extswsli instruction, which does an extend-sign word and shift left immediate.
@ STXVD2X
CHAIN = STXVD2X CHAIN, VSRC, Ptr - Occurs only for little endian.
@ TLSGD_AIX
GPRC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY G8RC = TLSGD_AIX, TOC_ENTRY, TOC_ENTRY Op that combines two re...
@ UINT_VEC_TO_FP
Extract a subvector from unsigned integer vector and convert to FP.
@ LXVRZX
LXVRZX - Load VSX Vector Rightmost and Zero Extend This node represents v1i128 BUILD_VECTOR of a zero...
@ MFBHRBE
GPRC, CHAIN = MFBHRBE CHAIN, Entry, Dummy - Move from branch history rolling buffer entry.
@ FCFIDU
Newer FCFID[US] integer-to-floating-point conversion instructions for unsigned integers and single-pr...
@ FSEL
FSEL - Traditional three-operand fsel node.
@ SWAP_NO_CHAIN
An SDNode for swaps that are not associated with any loads/stores and thereby have no chain.
@ LOAD_VEC_BE
VSRC, CHAIN = LOAD_VEC_BE CHAIN, Ptr - Occurs only for little endian.
@ LFIWAX
GPRC, CHAIN = LFIWAX CHAIN, Ptr - This is a floating-point load which sign-extends from a 32-bit inte...
@ STBRX
CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a byte-swapping store instruction.
@ LD_GOT_TPREL_L
G8RC = LD_GOT_TPREL_L Symbol, G8RReg - Used by the initial-exec TLS model, produces a LD instruction ...
@ MFVSR
Direct move from a VSX register to a GPR.
@ TLS_DYNAMIC_MAT_PCREL_ADDR
TLS_DYNAMIC_MAT_PCREL_ADDR = Materialize a PC Relative address for TLS global address when using dyna...
@ Hi
Hi/Lo - These represent the high and low 16-bit parts of a global address respectively.
Predicate
Predicate - These are "(BI << 5) | BO" for various predicates.
Definition: PPCPredicates.h:26
SDValue get_VSPLTI_elt(SDNode *N, unsigned ByteSize, SelectionDAG &DAG)
get_VSPLTI_elt - If this is a build_vector of constants which can be formed by using a vspltis[bhw] i...
bool isXXBRDShuffleMask(ShuffleVectorSDNode *N)
isXXBRDShuffleMask - Return true if this is a shuffle mask suitable for a XXBRD instruction.
FastISel * createFastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo)
@ DIR_PWR_FUTURE
Definition: PPCSubtarget.h:64
bool isVMRGHShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGHShuffleMask - Return true if this is a shuffle mask suitable for a VRGH* instruction with the ...
bool isVPKUDUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUDUMShuffleMask - Return true if this is the shuffle mask for a VPKUDUM instruction.
bool isVMRGEOShuffleMask(ShuffleVectorSDNode *N, bool CheckEven, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGEOShuffleMask - Return true if this is a shuffle mask suitable for a VMRGEW or VMRGOW instructi...
bool isXXBRQShuffleMask(ShuffleVectorSDNode *N)
isXXBRQShuffleMask - Return true if this is a shuffle mask suitable for a XXBRQ instruction.
bool isXXBRWShuffleMask(ShuffleVectorSDNode *N)
isXXBRWShuffleMask - Return true if this is a shuffle mask suitable for a XXBRW instruction.
bool isXXPERMDIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXPERMDIShuffleMask - Return true if this is a shuffle mask suitable for a XXPERMDI instruction.
bool isXXBRHShuffleMask(ShuffleVectorSDNode *N)
isXXBRHShuffleMask - Return true if this is a shuffle mask suitable for a XXBRH instruction.
unsigned getSplatIdxForPPCMnemonics(SDNode *N, unsigned EltSize, SelectionDAG &DAG)
getSplatIdxForPPCMnemonics - Return the splat index as a value that is appropriate for PPC mnemonics ...
bool isXXSLDWIShuffleMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, bool &Swap, bool IsLE)
isXXSLDWIShuffleMask - Return true if this is a shuffle mask suitable for a XXSLDWI instruction.
int isVSLDOIShuffleMask(SDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVSLDOIShuffleMask - If this is a vsldoi shuffle mask, return the shift amount, otherwise return -1.
bool isVMRGLShuffleMask(ShuffleVectorSDNode *N, unsigned UnitSize, unsigned ShuffleKind, SelectionDAG &DAG)
isVMRGLShuffleMask - Return true if this is a shuffle mask suitable for a VRGL* instruction with the ...
bool isXXINSERTWMask(ShuffleVectorSDNode *N, unsigned &ShiftElts, unsigned &InsertAtByte, bool &Swap, bool IsLE)
isXXINSERTWMask - Return true if this VECTOR_SHUFFLE can be handled by the XXINSERTW instruction intr...
bool isSplatShuffleMask(ShuffleVectorSDNode *N, unsigned EltSize)
isSplatShuffleMask - Return true if the specified VECTOR_SHUFFLE operand specifies a splat of a singl...
bool isVPKUWUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUWUMShuffleMask - Return true if this is the shuffle mask for a VPKUWUM instruction.
bool isVPKUHUMShuffleMask(ShuffleVectorSDNode *N, unsigned ShuffleKind, SelectionDAG &DAG)
isVPKUHUMShuffleMask - Return true if this is the shuffle mask for a VPKUHUM instruction.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
@ Define
Register definition.
@ GeneralDynamic
Definition: CodeGen.h:43
@ FS
Definition: X86.h:188
Reg
All possible values of the reg field in the ModR/M byte.
@ XMC_PR
Program Code.
Definition: XCOFF.h:61
@ XTY_ER
External reference.
Definition: XCOFF.h:197
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:441
CodeModel::Model getCodeModel()
constexpr double e
Definition: MathExtras.h:57
@ BCTR
Definition: ELF.h:91
const_iterator end(StringRef path)
Get end iterator over path.
Definition: Path.cpp:236
/file This file defines the SmallVector class.
Definition: AllocatorList.h:22
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
static bool isIndirectCall(const MachineInstr &MI)
constexpr bool isUInt< 16 >(uint64_t x)
Definition: MathExtras.h:408
STATISTIC(NumFunctions, "Total number of functions")
bool checkConvertToNonDenormSingle(APFloat &ArgAPFloat)
bool CC_PPC64_ELF_FIS(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue peekThroughBitcasts(SDValue V)
Return the non-bitcasted source operand of V if it exists.
bool CC_PPC32_SVR4_ByVal(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
bool isAligned(Align Lhs, uint64_t SizeInBytes)
Checks that SizeInBytes is a multiple of the alignment.
Definition: Alignment.h:138
bool isIntS16Immediate(SDNode *N, int16_t &Imm)
isIntS16Immediate - This method tests to see if the node is either a 32-bit or 64-bit immediate,...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
bool CC_PPC32_SVR4_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:496
auto reverse(ContainerTy &&C, std::enable_if_t< has_rbegin< ContainerTy >::value > *=nullptr)
Definition: STLExtras.h:359
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
uint32_t FloatToBits(float Float)
This function takes a float and returns the bit equivalent 32-bit integer.
Definition: MathExtras.h:663
Expected< ExpressionValue > min(const ExpressionValue &Lhs, const ExpressionValue &Rhs)
Definition: FileCheck.cpp:357
unsigned M1(unsigned Val)
Definition: VE.h:371
bool isReleaseOrStronger(AtomicOrdering AO)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1591
uint64_t PowerOf2Floor(uint64_t A)
Returns the power of two which is less than or equal to the given value.
Definition: MathExtras.h:695
const NoneType None
Definition: None.h:24
bool RetCC_PPC_Cold(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:491
bool convertToNonDenormSingle(APInt &ArgAPInt)
constexpr size_t array_lengthof(T(&)[N])
Find the length of an array.
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
bool CC_PPC32_SVR4(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
bool RetCC_PPC(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool isIntS34Immediate(SDNode *N, int64_t &Imm)
isIntS34Immediate - This method tests if value of node given can be accurately represented as a sign ...
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:148
auto count(R &&Range, const E &Element)
Wrapper function around std::count to count the number of times an element Element occurs in the give...
Definition: STLExtras.h:1667
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
unsigned M0(unsigned Val)
Definition: VE.h:370
ConstantSDNode * isConstOrConstSplat(SDValue N, bool AllowUndefs=false, bool AllowTruncation=false)
Returns the SDNode if it is a constant splat BuildVector or constant int.
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:761
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
OutputIt move(R &&Range, OutputIt Out)
Provide wrappers to std::move which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1642
Align commonAlignment(Align A, Align B)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:211
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:777
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
#define N
static const fltSemantics & IEEEsingle() LLVM_READNONE
Definition: APFloat.cpp:170
static constexpr roundingMode rmNearestTiesToEven
Definition: APFloat.h:189
static const fltSemantics & PPCDoubleDouble() LLVM_READNONE
Definition: APFloat.cpp:185
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Represent subnormal handling kind for floating point instruction inputs and outputs.
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:363
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:257
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition: ValueTypes.h:140
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
static EVT getIntegerVT(LLVMContext &Context, unsigned BitWidth)
Returns the EVT that represents an integer with the given number of bits.
Definition: ValueTypes.h:65
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
Definition: ValueTypes.h:349
std::string getEVTString() const
This function returns value type as a string, e.g. "i32".
Definition: ValueTypes.cpp:151
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
bool isExtended() const
Test if the given EVT is extended (as opposed to being simple).
Definition: ValueTypes.h:135
bool isScalarInteger() const
Return true if this is an integer, but not a vector.
Definition: ValueTypes.h:150
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
EVT getHalfNumVectorElementsVT(LLVMContext &Context) const
Definition: ValueTypes.h:415
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:145
bool isInConsecutiveRegs() const
unsigned getByValSize() const
bool isInConsecutiveRegsLast() const
void setByValSize(unsigned S)
Align getNonZeroByValAlign() const
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
OutputArg - This struct carries flags and a value for a single outgoing (actual) argument or outgoing...
bool isConstant() const
Returns true if we know the value of all bits.
Definition: KnownBits.h:50
void resetAll()
Resets the known state of all bits.
Definition: KnownBits.h:66
const APInt & getConstant() const
Returns the value when all bits have a known value.
Definition: KnownBits.h:57
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getGOT(MachineFunction &MF)
Return a MachinePointerInfo record that refers to a GOT entry.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:109
Structure that collects some common arguments that get passed around between the functions for call l...
These are IR-level optimization flags that may be propagated to SDNodes.
bool hasNoInfs() const
void setNoFPExcept(bool b)
bool hasNoNaNs() const
bool hasNoSignedZeros() const
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This represents an addressing mode of: BaseGV + BaseOffs + BaseReg + Scale*ScaleReg If BaseGV is null...
This contains information for each constraint that we are lowering.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals
SDValue CombineTo(SDNode *N, ArrayRef< SDValue > To, bool AddTo=true)